Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The three canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 3] = ["sources", "records", "wiki"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `id` — optional; derived from the file path when absent.
81    pub id: Option<String>,
82    /// `created` — RFC3339; required and auto-set on content-file create.
83    pub created: Option<DateTime<FixedOffset>>,
84    /// `updated` — RFC3339; required and auto-maintained on content files.
85    pub updated: Option<DateTime<FixedOffset>>,
86    /// `summary` — the one-line catalog line; required on every content file.
87    pub summary: Option<String>,
88    /// `status` — optional lifecycle state.
89    pub status: Option<String>,
90    /// `tags` — optional flat list of short scalar labels.
91    pub tags: Vec<String>,
92    /// All other frontmatter keys (type-specific + custom), preserved verbatim
93    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
94    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
95    /// [`WikiLink`]s.
96    pub extra: BTreeMap<String, Value>,
97}
98
99impl Frontmatter {
100    /// Parse a YAML frontmatter block (the text between the opening and closing
101    /// `---` fences, exclusive) into a [`Frontmatter`].
102    ///
103    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
104    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
105    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
106        // An empty (or whitespace-only) frontmatter block is a valid, empty
107        // mapping — not a YAML error.
108        let value: Value = if yaml.trim().is_empty() {
109            Value::Mapping(Mapping::new())
110        } else {
111            serde_norway::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
112                file: file.to_path_buf(),
113                source,
114            })?
115        };
116
117        // Top-level frontmatter must be a mapping. A scalar or sequence at the
118        // top level is malformed for our purposes; surface it as such.
119        let map = match value {
120            Value::Mapping(m) => m,
121            Value::Null => Mapping::new(),
122            other => {
123                // serde_norway::Error has no public constructor, so let the
124                // deserializer decide: a value that coerces to a Mapping (e.g. a
125                // YAML-tagged mapping `!tag\n k: v`, where the tag is ambient) is
126                // accepted as that mapping; a genuine scalar or sequence top
127                // level fails to coerce and IS the malformed case. (Using a
128                // match here, not `expect_err`, avoids a panic on the
129                // tagged-mapping case, which deserializes to a Mapping just
130                // fine.)
131                match serde_norway::from_value::<Mapping>(other) {
132                    Ok(m) => m,
133                    Err(source) => {
134                        return Err(ParseError::MalformedYaml {
135                            file: file.to_path_buf(),
136                            source,
137                        });
138                    }
139                }
140            }
141        };
142
143        let mut fm = Frontmatter::default();
144        for (k, v) in map {
145            let key = match k.as_str() {
146                Some(s) => s.to_string(),
147                // Non-string keys are unusual; stringify defensively and keep
148                // them in `extra` so nothing is silently dropped.
149                None => format!("{k:?}"),
150            };
151            match key.as_str() {
152                // Coerce scalar values rather than `v.as_str()` (which is None
153                // for Number/Bool/Null). A bare scalar that YAML reads as a
154                // non-string — `summary: 2026`, `id: 100`, `status: 0` — would
155                // otherwise be set to None AND dropped (it is a matched arm, so
156                // the raw value never reaches `extra`), and `to_yaml` then omits
157                // the None field, so `dbmd format` (read_file -> write_file)
158                // silently deletes the line from disk. `scalar_string` mirrors
159                // the coercion `validate`/`store` already apply to these fields,
160                // so a numeric/bool-looking scalar is preserved as its string
161                // form and round-trips instead of being destroyed.
162                "type" => fm.type_ = scalar_string(&v),
163                "id" => fm.id = scalar_string(&v),
164                "created" => fm.created = parse_timestamp(&v, "created", file)?,
165                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
166                "summary" => fm.summary = scalar_string(&v),
167                "status" => fm.status = scalar_string(&v),
168                "tags" => fm.tags = parse_tags(&v),
169                _ => {
170                    fm.extra.insert(key, v);
171                }
172            }
173        }
174        Ok(fm)
175    }
176
177    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
178    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
179    pub fn to_yaml(&self) -> String {
180        // Build an order-preserving mapping in canonical key order:
181        //   type, id, created, updated, summary  (universal head)
182        //   <type-specific extra, BTreeMap-sorted>
183        //   status, tags                          (universal tail)
184        // serde_norway::Mapping preserves insertion order, so one serialize call
185        // emits the block in exactly this order with correct YAML quoting.
186        let mut map = Mapping::new();
187
188        if let Some(t) = &self.type_ {
189            map.insert(Value::String("type".into()), Value::String(t.clone()));
190        }
191        if let Some(id) = &self.id {
192            map.insert(Value::String("id".into()), Value::String(id.clone()));
193        }
194        if let Some(created) = &self.created {
195            map.insert(
196                Value::String("created".into()),
197                Value::String(created.to_rfc3339()),
198            );
199        }
200        if let Some(updated) = &self.updated {
201            map.insert(
202                Value::String("updated".into()),
203                Value::String(updated.to_rfc3339()),
204            );
205        }
206        if let Some(summary) = &self.summary {
207            map.insert(
208                Value::String("summary".into()),
209                Value::String(summary.clone()),
210            );
211        }
212
213        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
214        // is canonicalized so a wiki-link round-trips to the form the writer and
215        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
216        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
217        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
218        // block sequence `- - x` that a verbatim re-emit would produce and that
219        // destroys the link. See [`canonicalize_extra_value`].
220        for (k, v) in &self.extra {
221            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
222        }
223
224        if let Some(status) = &self.status {
225            map.insert(
226                Value::String("status".into()),
227                Value::String(status.clone()),
228            );
229        }
230        if !self.tags.is_empty() {
231            map.insert(
232                Value::String("tags".into()),
233                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
234            );
235        }
236
237        if map.is_empty() {
238            return String::new();
239        }
240        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
241    }
242
243    /// True if the file is content (under `sources/`, `records/`, or `wiki/`)
244    /// and not an `index.md`. Used by validate to decide which files require a
245    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
246    pub fn is_content_file(path: &Path) -> bool {
247        // index.md is a meta file at every level, never content.
248        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
249            return false;
250        }
251        // Content iff some path component is one of the three layer dirs. This
252        // works for both store-relative (`sources/emails/x.md`) and absolute
253        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
254        // root, under no layer, so they fall through to false.
255        path.components().any(|c| {
256            c.as_os_str()
257                .to_str()
258                .is_some_and(|s| LAYER_DIRS.contains(&s))
259        })
260    }
261
262    /// Resolve the file's effective `id`: the explicit `id` field if present,
263    /// otherwise derived from the store-relative path (filename without `.md`).
264    pub fn effective_id(&self, store_relative_path: &Path) -> String {
265        if let Some(id) = &self.id {
266            if !id.is_empty() {
267                return id.clone();
268            }
269        }
270        // Derived id = filename without the `.md` extension.
271        store_relative_path
272            .file_stem()
273            .and_then(|s| s.to_str())
274            .unwrap_or_default()
275            .to_string()
276    }
277
278    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
279    /// typed fields first and then [`extra`](Frontmatter::extra).
280    pub fn get(&self, key: &str) -> Option<Value> {
281        match key {
282            "type" => self.type_.clone().map(Value::String),
283            "id" => self.id.clone().map(Value::String),
284            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
285            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
286            "summary" => self.summary.clone().map(Value::String),
287            "status" => self.status.clone().map(Value::String),
288            "tags" => {
289                if self.tags.is_empty() {
290                    None
291                } else {
292                    Some(Value::Sequence(
293                        self.tags.iter().cloned().map(Value::String).collect(),
294                    ))
295                }
296            }
297            _ => self.extra.get(key).cloned(),
298        }
299    }
300
301    /// Set a single frontmatter key from a string value, routing universal-
302    /// contract keys to their typed fields and everything else to
303    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
304    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
305        match key {
306            "type" => self.type_ = Some(value.to_string()),
307            "id" => self.id = Some(value.to_string()),
308            "created" => {
309                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
310            }
311            "updated" => {
312                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
313            }
314            "summary" => self.summary = Some(value.to_string()),
315            "status" => self.status = Some(value.to_string()),
316            "tags" => {
317                // Accept either a YAML flow list (`[a, b]`) or a single scalar
318                // tag. Anything that parses to a sequence becomes the tag list;
319                // otherwise the whole string is one tag.
320                self.tags = match serde_norway::from_str::<Value>(value) {
321                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
322                    _ => vec![value.to_string()],
323                };
324            }
325            _ => {
326                // A custom / type-specific field. The value is a scalar string by
327                // default, but the spec's list-valued link fields (e.g.
328                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
329                // block sequence of quoted wiki-links — never the flow-form string
330                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
331                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
332                // sequence whose every item is a clean single wiki-link, store the
333                // canonical sequence so `to_yaml` emits block form. Everything else
334                // — plain text, and a single inline `[[x]]` (which YAML reads as a
335                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
336                // verbatim scalar string, preserving the prior behavior.
337                let stored = parse_link_list_value(value)
338                    .unwrap_or_else(|| Value::String(value.to_string()));
339                self.extra.insert(key.to_string(), stored);
340            }
341        }
342        Ok(())
343    }
344
345    /// Extract every frontmatter field whose value is a wiki-link (scalar
346    /// inline form or a block-sequence list), pairing each with its key. The
347    /// validate engine checks these against `(link)` schema annotations.
348    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
349        let mut out = Vec::new();
350        // `summary` may carry navigational wiki-links (spec encourages it).
351        if let Some(summary) = &self.summary {
352            for link in extract_wiki_links(summary, Path::new("")) {
353                out.push(("summary".to_string(), link));
354            }
355        }
356        // Every type-specific / custom field: a scalar wiki-link or a list of
357        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
358        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
359        for (key, value) in &self.extra {
360            for link in links_in_field_value(value) {
361                out.push((key.clone(), link));
362            }
363        }
364        out
365    }
366}
367
368/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
369///
370/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
371/// flags whether it's a full store-relative path (the doctrine) versus a
372/// short-form (a validation error).
373#[derive(Debug, Clone, PartialEq, Eq)]
374pub struct WikiLink {
375    /// The link target as written, without the `[[ ]]` and without `|display`.
376    pub target: String,
377    /// The optional `|display` text override.
378    pub display: Option<String>,
379    /// True when `target` is a full store-relative path (contains a `/` and
380    /// resolves under a known layer); false for short-form targets like
381    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
382    pub is_full_path: bool,
383    /// True when `target` carries a trailing `.md` extension — validate warns
384    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
385    pub has_md_extension: bool,
386    /// Where the link appears: `(file, line, col)`, 1-based line and column.
387    pub location: (PathBuf, u32, u32),
388}
389
390/// A standard markdown link `[text](url)` — an external reference, kept in a
391/// stream separate from [`WikiLink`] so external targets are visible to the
392/// toolkit without being conflated with in-store edges. Not graph-validated.
393#[derive(Debug, Clone, PartialEq, Eq)]
394pub struct MarkdownLink {
395    /// The link text inside `[ ]`.
396    pub text: String,
397    /// The URL or path inside `( )`.
398    pub url: String,
399    /// Where the link appears: `(file, line, col)`, 1-based.
400    pub location: (PathBuf, u32, u32),
401}
402
403/// A `##`/`###` section of a markdown body: the heading text plus the byte
404/// slice of the body it spans (heading line through the line before the next
405/// heading of equal-or-shallower depth).
406#[derive(Debug, Clone, PartialEq, Eq)]
407pub struct Section {
408    /// The heading text (without the leading `#`s).
409    pub heading: String,
410    /// Heading depth (number of leading `#`s).
411    pub level: u8,
412    /// The 1-based line where the heading appears.
413    pub line: u32,
414    /// The section body, from the heading line to the next sibling-or-shallower
415    /// heading (exclusive), as a slice of the original body.
416    pub body: String,
417}
418
419/// The parsed structured content of a store's `DB.md` config file.
420///
421/// All four parts are optional in the source; absent parts fall back to spec
422/// defaults. Produced by [`parse_db_md`].
423#[derive(Debug, Clone, Default, PartialEq)]
424pub struct Config {
425    /// Body of the `## Agent instructions` section — free-form prose passed to
426    /// the agent's system prompt.
427    pub agent_instructions: Option<String>,
428    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
429    /// refuses to write (`POLICY_FROZEN_PAGE`).
430    pub frozen_pages: Vec<PathBuf>,
431    /// `## Policies` → `### Ignored types`: type names the curator never
432    /// synthesizes (still readable as ambient context).
433    pub ignored_types: Vec<String>,
434    /// `## Schemas` → one entry per `### <type>` sub-section.
435    pub schemas: BTreeMap<String, Schema>,
436}
437
438impl Config {
439    /// The `### Frozen pages` entry that matches a store-relative `target`, if
440    /// any. The **single** frozen-page matcher every write surface must funnel
441    /// through so the policy is enforced identically on `write` / `fm set` /
442    /// `fm init` / `link` / `rename` / `format`.
443    ///
444    /// Comparison is normalized so a policy line and a write target match
445    /// regardless of incidental spelling differences:
446    /// - `/` path separators on every OS,
447    /// - a single leading `./` dropped,
448    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
449    ///   frozen entries verbatim, so an operator who writes the natural
450    ///   extensionless spelling (`records/decisions/q1`) must protect the file
451    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
452    ///
453    /// Returns the matched config entry verbatim (its original spelling) so the
454    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
455    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
456        let want = normalize_frozen_path(target);
457        self.frozen_pages
458            .iter()
459            .find(|frozen| normalize_frozen_path(frozen) == want)
460            .cloned()
461    }
462
463    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
464    /// over [`Config::frozen_match`] for callers that only need presence.
465    pub fn is_frozen(&self, target: &Path) -> bool {
466        self.frozen_match(target).is_some()
467    }
468}
469
470/// Normalize a path for frozen-page comparison: `/` separators, a single
471/// leading `./` dropped, and a trailing `.md` dropped. Both the policy entry
472/// and the write target pass through this before equality, so the match is
473/// separator-, `./`-, and `.md`-insensitive.
474fn normalize_frozen_path(p: &Path) -> String {
475    let unix: String = p
476        .components()
477        .filter_map(|c| c.as_os_str().to_str())
478        .collect::<Vec<_>>()
479        .join("/");
480    let no_dot = unix.strip_prefix("./").unwrap_or(&unix);
481    no_dot.strip_suffix(".md").unwrap_or(no_dot).to_string()
482}
483
484/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
485/// The store's `## Schemas` is the **only** source of schema enforcement — the
486/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
487#[derive(Debug, Clone, Default, PartialEq)]
488pub struct Schema {
489    /// One [`FieldSpec`] per bulleted field line, in source order.
490    pub fields: Vec<FieldSpec>,
491    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
492    /// uniqueness constraint over the listed field(s) (compound when >1). Two
493    /// records of this type whose listed values collide warn as
494    /// `DUP_UNIQUE_KEY`.
495    pub unique_keys: Vec<Vec<String>>,
496    /// `- summary_template: <template>` directive — the `{field}` interpolation
497    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
498    /// for this type. `None` falls back to the body's first paragraph.
499    pub summary_template: Option<String>,
500    /// `- shard: by-date | flat` directive — whether records of this type are
501    /// date-sharded on disk (`records/<type>/<YYYY>/<MM>/…`) or kept flat.
502    /// `None` = no directive declared, so the store's built-in default for the
503    /// type applies ([`crate::store::Store::type_shards`]); `Some(true)` forces
504    /// date-sharding (e.g. a custom event type the toolkit has no built-in for);
505    /// `Some(false)` forces flat. This is the v0.2 generic-model way to declare
506    /// sharding — the toolkit ships no implicit per-type behavior beyond the
507    /// example-type defaults.
508    pub shard: Option<bool>,
509}
510
511/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
512///
513/// Modifiers are comma-separated inside the parens; this captures the
514/// recognized ones as typed fields and stashes anything unrecognized in
515/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
516#[derive(Debug, Clone, Default, PartialEq)]
517pub struct FieldSpec {
518    /// The field name.
519    pub name: String,
520    /// `required` modifier present.
521    pub required: bool,
522    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
523    /// `url`), if any.
524    pub shape: Option<Shape>,
525    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
526    /// start with. The trailing slash is required in the source syntax.
527    pub link_prefix: Option<PathBuf>,
528    /// `default <value>` — the value written when the field is absent.
529    pub default: Option<Value>,
530    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
531    /// on the line because of its own commas).
532    pub enum_values: Option<Vec<String>>,
533    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
534    /// validate surfaces these as `Info`, never errors.
535    pub unknown_modifiers: Vec<String>,
536}
537
538/// A recognized shape modifier for a schema field. Validate enforces the
539/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Shape {
542    /// Any scalar string.
543    String,
544    /// Integer.
545    Int,
546    /// Boolean.
547    Bool,
548    /// RFC3339 / ISO-8601 date.
549    Date,
550    /// `<local>@<domain>` email address.
551    Email,
552    /// A currency amount.
553    Currency,
554    /// A URL.
555    Url,
556}
557
558/// The result of splitting a raw file into its frontmatter block and body.
559///
560/// `body` is the verbatim remainder after the closing `---` fence — the writer
561/// preserves it byte-for-byte so operator edits are never reflowed.
562#[derive(Debug, Clone, PartialEq, Eq)]
563pub struct ParsedFile {
564    /// The raw frontmatter YAML (between the fences, exclusive of them).
565    pub frontmatter_yaml: String,
566    /// The verbatim body (everything after the closing `---`).
567    pub body: String,
568}
569
570/// Split a file's full text into its frontmatter block and body. The
571/// frontmatter block must be the very first thing in the file, delimited by
572/// `---` on its own line at start and end. Returns
573/// [`ParseError::MissingFrontmatter`] if absent.
574pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
575    // Tolerate a single leading UTF-8 BOM (U+FEFF) before the opening fence,
576    // matching `store::frontmatter_block` and `index::extract_frontmatter_block`
577    // which already strip it. Without this, a BOM-prefixed file (common from
578    // Windows / exported markdown dropped into `sources/`) gets walked and
579    // indexed by `dbmd index` yet hard-fails every write/edit surface that
580    // routes through `read_file` (`fm get/set`, `format`, `link`, `write`). The
581    // BOM is dropped from the emitted body so the canonical writer never carries
582    // it forward.
583    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
584
585    // The opening fence must be the very first line: `---` (optionally with a
586    // trailing CR), no leading whitespace, nothing before it.
587    let mut lines = text.split_inclusive('\n');
588    let first = lines.next().unwrap_or("");
589    if first.trim_end_matches(['\r', '\n']) != "---" {
590        return Err(ParseError::MissingFrontmatter {
591            file: file.to_path_buf(),
592        });
593    }
594
595    // Scan for the closing fence line. Track byte offsets so we can slice the
596    // YAML (between fences, exclusive) and the body (verbatim, after the
597    // closing fence's line terminator).
598    let opening_len = first.len();
599    let mut offset = opening_len;
600    for line in lines {
601        if line.trim_end_matches(['\r', '\n']) == "---" {
602            let yaml = &text[opening_len..offset];
603            let body_start = offset + line.len();
604            let body = &text[body_start..];
605            return Ok(ParsedFile {
606                frontmatter_yaml: yaml.to_string(),
607                body: body.to_string(),
608            });
609        }
610        offset += line.len();
611    }
612
613    // Opening fence present but no closing fence: malformed frontmatter block.
614    Err(ParseError::MissingFrontmatter {
615        file: file.to_path_buf(),
616    })
617}
618
619/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
620/// verbatim body string.
621pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
622    let text = std::fs::read_to_string(path)?;
623    let parsed = split_frontmatter(&text, path)?;
624    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
625    Ok((fm, parsed.body))
626}
627
628/// Atomically write a markdown file from frontmatter + body: emit the
629/// frontmatter in canonical key order, then the body verbatim, via a
630/// temp-file-rename so a reader never sees a half-written file. Preserves the
631/// operator-edited body exactly as given.
632pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
633    let contents = render_file(frontmatter, body);
634
635    // One durable, atomic write for all primary data (see `crate::fsx`):
636    // temp-file + fsync + rename + parent-fsync. Content records are primary
637    // data, so they get the durable path (unlike the rebuildable index).
638    crate::fsx::write_atomic(path, contents.as_bytes())?;
639    Ok(())
640}
641
642/// Atomically create a markdown file from frontmatter + body, refusing with
643/// [`std::io::ErrorKind::AlreadyExists`] if the destination already exists.
644///
645/// This is the create-new sibling of [`write_file`]: same canonical rendering
646/// and durable temp-file path, but backed by [`crate::fsx::write_atomic_new`] so
647/// two concurrent creators for the same path cannot both succeed.
648pub fn write_file_new(
649    path: &Path,
650    frontmatter: &Frontmatter,
651    body: &str,
652) -> Result<(), ParseError> {
653    let contents = render_file(frontmatter, body);
654    crate::fsx::write_atomic_new(path, contents.as_bytes())?;
655    Ok(())
656}
657
658fn render_file(frontmatter: &Frontmatter, body: &str) -> String {
659    let yaml = frontmatter.to_yaml();
660    // `to_yaml` already terminates each block with a newline. Compose the file
661    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
662    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
663    contents.push_str("---\n");
664    contents.push_str(&yaml);
665    contents.push_str("---\n");
666    contents.push_str(body);
667    contents
668}
669
670/// Extract every wiki-link from a body (and inline frontmatter), returning the
671/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
672/// `(file, line, col)` locations set.
673pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
674    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
675    let re = RE.get_or_init(|| {
676        // [[target]] or [[target|display]]; target/display exclude brackets and
677        // (for target) the `|` separator so nested forms don't over-match.
678        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
679    });
680
681    let mut out = Vec::new();
682    for (line_idx, line) in body.lines().enumerate() {
683        for caps in re.captures_iter(line) {
684            let whole = caps.get(0).expect("group 0 always present");
685            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
686            let display = caps.get(2).map(|m| m.as_str().to_string());
687            out.push(WikiLink {
688                is_full_path: target_is_full_path(&target),
689                has_md_extension: target_has_md_extension(&target),
690                target,
691                display,
692                location: (
693                    file.to_path_buf(),
694                    (line_idx as u32) + 1,
695                    char_column(line, whole.start()),
696                ),
697            });
698        }
699    }
700    out
701}
702
703/// Extract every standard markdown link `[text](url)` from a body into a
704/// separate stream, kept distinct from wiki-links.
705pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
706    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
707    let re = RE.get_or_init(|| {
708        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
709        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
710        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
711    });
712
713    let mut out = Vec::new();
714    for (line_idx, line) in body.lines().enumerate() {
715        for caps in re.captures_iter(line) {
716            let whole = caps.get(0).expect("group 0 always present");
717            out.push(MarkdownLink {
718                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
719                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
720                location: (
721                    file.to_path_buf(),
722                    (line_idx as u32) + 1,
723                    char_column(line, whole.start()),
724                ),
725            });
726        }
727    }
728    out
729}
730
731/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
732/// written so YAML parses it as nested sequences instead of a clean list of
733/// strings. Returns the offending keys so validate can emit
734/// `WIKI_LINK_FLOW_FORM_LIST`.
735///
736/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
737/// shapes nest:
738///
739/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
740///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
741/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
742///   (triple-nested). The list mis-encoding — flagged.
743/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
744///   is flagged too; the canonical list form must quote each item
745///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
746///
747/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
748/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
749/// single item is a sequence-of-scalars.
750pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
751    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
752        Ok(v) => v,
753        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
754        Err(_) => return Vec::new(),
755    };
756    let Value::Mapping(map) = value else {
757        return Vec::new();
758    };
759
760    let mut out = Vec::new();
761    for (k, v) in &map {
762        if let Value::Sequence(items) = v {
763            // Triple-nesting: some outer item is a sequence that itself holds a
764            // sequence. Scalar inline `[[x]]` is only double-nested, so it
765            // never matches.
766            let is_link_list = items.iter().any(|item| match item {
767                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
768                _ => false,
769            });
770            if is_link_list {
771                if let Some(key) = k.as_str() {
772                    out.push(key.to_string());
773                }
774            }
775        }
776    }
777    out
778}
779
780/// Extract the `##`/`###` sections of a markdown body into a flat list with
781/// body slices.
782pub fn extract_sections(body: &str) -> Vec<Section> {
783    // Keep each line's start so we can slice the body verbatim (exact newlines).
784    let lines: Vec<&str> = body.split_inclusive('\n').collect();
785
786    // First pass: classify heading levels (0 = not a heading), honoring fenced
787    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
788    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
789    let mut fence: Option<(u8, usize)> = None;
790    for line in &lines {
791        let content = line.trim_end_matches(['\n', '\r']);
792        if let Some(f) = fence {
793            if is_closing_fence(content, f) {
794                fence = None;
795            }
796            levels.push(0);
797            continue;
798        }
799        if let Some(opened) = opening_fence(content) {
800            fence = Some(opened);
801            levels.push(0);
802            continue;
803        }
804        levels.push(heading_level(content));
805    }
806
807    // Second pass: emit `##`+ headings; each section body runs from its heading
808    // line to the next heading at an equal-or-shallower level (exclusive).
809    let mut sections = Vec::new();
810    for (i, &lvl) in levels.iter().enumerate() {
811        if lvl < 2 {
812            continue;
813        }
814        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
815        let heading = heading_text(heading_line, lvl);
816
817        let mut end = lines.len();
818        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
819            if other != 0 && other <= lvl {
820                end = j;
821                break;
822            }
823        }
824
825        sections.push(Section {
826            heading,
827            level: lvl,
828            line: (i + 1) as u32,
829            body: lines[i..end].concat(),
830        });
831    }
832    sections
833}
834
835/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
836/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
837/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
838/// ignored; absent sections leave their [`Config`] fields at default.
839pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
840    // The structured sections live in the body (after frontmatter). DB.md must
841    // still start with a valid `---` block (`type: db-md`); if it's missing we
842    // surface MissingFrontmatter like any other file.
843    let parsed = split_frontmatter(text, file)?;
844    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
845    let sections = extract_sections(&parsed.body);
846
847    let mut config = Config::default();
848    // Track which H2 region each H3 belongs to as we walk the flat list.
849    let mut current_h2: Option<String> = None;
850
851    for section in &sections {
852        match section.level {
853            2 => {
854                let name = section.heading.trim().to_ascii_lowercase();
855                current_h2 = Some(name.clone());
856                if name == "agent instructions" {
857                    let prose = section_prose(&section.body);
858                    if !prose.is_empty() {
859                        config.agent_instructions = Some(prose);
860                    }
861                }
862            }
863            3 => {
864                let h2 = current_h2.as_deref().unwrap_or("");
865                let h3 = section.heading.trim().to_ascii_lowercase();
866                match (h2, h3.as_str()) {
867                    ("policies", "frozen pages") => {
868                        config.frozen_pages = bullet_lines(&section.body)
869                            .into_iter()
870                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
871                            .collect();
872                    }
873                    ("policies", "ignored types") => {
874                        config.ignored_types = bullet_lines(&section.body)
875                            .into_iter()
876                            .flat_map(|b| extract_type_list_bullet(&b))
877                            .collect();
878                    }
879                    ("schemas", _) => {
880                        // The H3 heading text (as written) is the type name.
881                        let type_name = section.heading.trim().to_string();
882                        let mut schema = Schema::default();
883                        for b in bullet_lines(&section.body) {
884                            match parse_schema_bullet(&b) {
885                                SchemaBullet::Field(f) => schema.fields.push(f),
886                                SchemaBullet::Unique(k) if !k.is_empty() => {
887                                    schema.unique_keys.push(k)
888                                }
889                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
890                                    schema.summary_template = Some(t)
891                                }
892                                SchemaBullet::Shard(Some(b)) => schema.shard = Some(b),
893                                // Empty `unique:`/`summary_template:`, or a `shard:`
894                                // with an unrecognized value — ignored.
895                                SchemaBullet::Unique(_)
896                                | SchemaBullet::SummaryTemplate(_)
897                                | SchemaBullet::Shard(None) => {}
898                            }
899                        }
900                        config.schemas.insert(type_name, schema);
901                    }
902                    _ => {}
903                }
904            }
905            _ => {}
906        }
907    }
908
909    Ok(config)
910}
911
912/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
913/// reserved directive (`unique:` / `summary_template:` / `shard:`). The names
914/// `unique`, `summary_template`, and `shard` are reserved and cannot be used as
915/// field names.
916#[derive(Debug)]
917enum SchemaBullet {
918    /// An ordinary `- <name> (<modifiers>)` field.
919    Field(FieldSpec),
920    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
921    Unique(Vec<String>),
922    /// `- summary_template: <template>` — the default-`summary` pattern.
923    SummaryTemplate(String),
924    /// `- shard: by-date | flat` — date-shard records of this type, or keep them
925    /// flat. `None` = an unrecognized value, ignored like an unknown modifier.
926    Shard(Option<bool>),
927}
928
929/// Classify one `## Schemas` bullet as a directive or a field. The directive
930/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
931/// guards against false positives — a field like `- status (enum: a, b)` has a
932/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
933/// as a [`FieldSpec`].
934fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
935    let line = bullet_line.trim();
936    let line = line
937        .strip_prefix("- ")
938        .or_else(|| line.strip_prefix("* "))
939        .or_else(|| line.strip_prefix("+ "))
940        .or_else(|| line.strip_prefix('-'))
941        .unwrap_or(line)
942        .trim();
943
944    if let Some((head, rest)) = line.split_once(':') {
945        match head.trim().to_ascii_lowercase().as_str() {
946            "unique" => {
947                let fields = rest
948                    .split(',')
949                    .map(|f| f.trim().to_string())
950                    .filter(|f| !f.is_empty())
951                    .collect();
952                return SchemaBullet::Unique(fields);
953            }
954            "summary_template" => {
955                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
956            }
957            "shard" => {
958                // `by-date` (synonyms: date/sharded/true) enables date-sharding;
959                // `flat` (none/false) forces flat; anything else is ignored.
960                let v = match rest.trim().to_ascii_lowercase().as_str() {
961                    "by-date" | "date" | "sharded" | "true" => Some(true),
962                    "flat" | "none" | "false" => Some(false),
963                    _ => None,
964                };
965                return SchemaBullet::Shard(v);
966            }
967            _ => {}
968        }
969    }
970
971    SchemaBullet::Field(parse_field_spec(bullet_line))
972}
973
974/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
975/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
976/// in [`FieldSpec::unknown_modifiers`].
977pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
978    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
979    let line = bullet_line.trim();
980    let line = line
981        .strip_prefix("- ")
982        .or_else(|| line.strip_prefix("* "))
983        .or_else(|| line.strip_prefix("+ "))
984        .or_else(|| line.strip_prefix('-'))
985        .unwrap_or(line)
986        .trim();
987
988    // Split `<name> (<modifiers>)`. A bullet without parens is a free-form
989    // optional field of any shape — name only, no modifiers.
990    let (name, modifiers) = match line.find('(') {
991        Some(open) => {
992            let name = line[..open].trim().to_string();
993            let after = &line[open + 1..];
994            let mods = match after.rfind(')') {
995                Some(close) => &after[..close],
996                None => after, // tolerate a missing close paren
997            };
998            (name, mods.trim())
999        }
1000        None => (line.to_string(), ""),
1001    };
1002
1003    let mut spec = FieldSpec {
1004        name,
1005        ..FieldSpec::default()
1006    };
1007
1008    if modifiers.is_empty() {
1009        return spec;
1010    }
1011
1012    // Modifiers are comma-separated. `enum:` is special: because its own value
1013    // list contains commas, it must be last and swallows the remainder.
1014    let raw: Vec<&str> = modifiers.split(',').collect();
1015    let mut i = 0;
1016    while i < raw.len() {
1017        let token = raw[i].trim();
1018        if token.is_empty() {
1019            i += 1;
1020            continue;
1021        }
1022        let lower = token.to_ascii_lowercase();
1023
1024        if lower == "required" {
1025            spec.required = true;
1026        } else if let Some(shape) = shape_from_str(&lower) {
1027            spec.shape = Some(shape);
1028        } else if let Some(rest) = lower.strip_prefix("link to ") {
1029            // The trailing slash is required in the source; store the prefix
1030            // without it so `Path::starts_with` comparisons are clean.
1031            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
1032            let _ = rest; // lowercase form only used for the keyword match
1033            spec.link_prefix = Some(PathBuf::from(prefix));
1034        } else if let Some(_rest) = lower.strip_prefix("default ") {
1035            // Value is everything after the keyword on this comma-token,
1036            // preserving original case.
1037            let value = token["default ".len()..].trim().to_string();
1038            spec.default = Some(Value::String(value));
1039        } else if lower == "enum" {
1040            // Bare `enum` keyword (`enum, open, closed`): the values are the
1041            // REMAINING tokens — the keyword itself must not leak in as a value.
1042            let values: Vec<String> = raw[i + 1..]
1043                .iter()
1044                .map(|v| v.trim().to_string())
1045                .filter(|v| !v.is_empty())
1046                .collect();
1047            spec.enum_values = Some(values);
1048            break; // enum consumed the rest of the line
1049        } else if lower.starts_with("enum:") {
1050            // `enum: open, closed` form: rejoin this token and the rest, then
1051            // drop everything up to and including the `:`.
1052            let mut joined = raw[i..].join(",");
1053            if let Some(colon) = joined.find(':') {
1054                joined = joined[colon + 1..].to_string();
1055            }
1056            let values: Vec<String> = joined
1057                .split(',')
1058                .map(|v| v.trim().to_string())
1059                .filter(|v| !v.is_empty())
1060                .collect();
1061            spec.enum_values = Some(values);
1062            break; // enum consumed the rest of the line
1063        } else {
1064            // Unrecognized modifier — captured verbatim, surfaced as Info.
1065            spec.unknown_modifiers.push(token.to_string());
1066        }
1067        i += 1;
1068    }
1069
1070    spec
1071}
1072
1073// ── Private helpers ─────────────────────────────────────────────────────────
1074
1075/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1076/// is treated as absent; anything else must be an RFC3339 string.
1077fn parse_timestamp(
1078    value: &Value,
1079    key: &str,
1080    file: &Path,
1081) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1082    match value {
1083        Value::Null => Ok(None),
1084        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1085        other => Err(ParseError::BadTimestamp {
1086            file: file.to_path_buf(),
1087            key: key.to_string(),
1088            value: format!("{other:?}"),
1089        }),
1090    }
1091}
1092
1093/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1094fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1095    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1096        file: file.to_path_buf(),
1097        key: key.to_string(),
1098        value: s.to_string(),
1099    })
1100}
1101
1102/// Coerce a YAML scalar value to its string form for the universal-contract
1103/// fields (`type`/`id`/`summary`/`status`). Mirrors `validate::scalar_string`
1104/// and `store::yaml_scalar_string` so the four modules agree on one coercion
1105/// rule: a bare numeric/bool scalar (`id: 100`, `summary: 2026`, `status: 0`)
1106/// is preserved as its string form rather than being read as None and silently
1107/// dropped on the next `to_yaml` re-emit. Returns `None` only for genuinely
1108/// non-scalar values (sequences, mappings, null), which were never a valid
1109/// shape for these fields.
1110fn scalar_string(value: &Value) -> Option<String> {
1111    match value {
1112        Value::String(s) => Some(s.clone()),
1113        Value::Number(n) => Some(n.to_string()),
1114        Value::Bool(b) => Some(b.to_string()),
1115        _ => None,
1116    }
1117}
1118
1119/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1120/// (the canonical form) or a single scalar (coerced to a one-element list).
1121fn parse_tags(value: &Value) -> Vec<String> {
1122    match value {
1123        Value::Sequence(items) => items
1124            .iter()
1125            .filter_map(|v| match v {
1126                Value::String(s) => Some(s.clone()),
1127                Value::Number(n) => Some(n.to_string()),
1128                Value::Bool(b) => Some(b.to_string()),
1129                _ => None,
1130            })
1131            .collect(),
1132        Value::String(s) => vec![s.clone()],
1133        _ => Vec::new(),
1134    }
1135}
1136
1137/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1138/// location, or `None` if the string is not a bare wiki-link. Used for
1139/// frontmatter-valued links where there is no body position to report.
1140fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1141    let s = s.trim();
1142    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1143    // Reject anything with further brackets (e.g. the nested flow-form item),
1144    // which is not a clean single wiki-link.
1145    if inner.contains('[') || inner.contains(']') {
1146        return None;
1147    }
1148    let (target, display) = match inner.split_once('|') {
1149        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1150        None => (inner.to_string(), None),
1151    };
1152    Some(WikiLink {
1153        is_full_path: target_is_full_path(&target),
1154        has_md_extension: target_has_md_extension(&target),
1155        target,
1156        display,
1157        location: (PathBuf::new(), 0, 0),
1158    })
1159}
1160
1161/// Extract every wiki-link from a single frontmatter field value, accepting the
1162/// two canonical forms the spec defines (SPEC § Linking):
1163///
1164/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1165///   canonical unquoted inline (`f: [[x]]`) form, and
1166/// - a **list** field whose items are quoted wiki-link strings
1167///   (`- "[[x]]"`).
1168///
1169/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1170/// so the parsed [`Value`] shapes are not what one would naively expect:
1171///
1172/// | source                         | parsed `Value`                     | here |
1173/// |--------------------------------|------------------------------------|------|
1174/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1175/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1176/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1177/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1178///
1179/// The last row — an *unquoted list* — parses identically to the flow-form list
1180/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1181/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1182/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1183/// edge enumerator only ever sees the valid canonical forms.
1184///
1185/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1186/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1187/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1188fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1189    // Quoted scalar: `field: "[[x]]"`.
1190    if let Value::String(s) = value {
1191        return parse_wiki_link_str(s).into_iter().collect();
1192    }
1193    let Value::Sequence(items) = value else {
1194        return Vec::new();
1195    };
1196    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1197    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1198    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1199    if items.len() == 1 {
1200        if let Some(link) = unquoted_inline_link(&items[0]) {
1201            return vec![link];
1202        }
1203    }
1204    // Otherwise a list of quoted wiki-link strings; non-string items (the
1205    // unquoted-list mis-encoding) are left for validate to flag.
1206    items
1207        .iter()
1208        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1209        .collect()
1210}
1211
1212/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1213///
1214/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1215/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1216/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1217/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1218/// Re-emitting that verbatim yields the block sequence
1219///
1220/// ```text
1221/// company:
1222/// - - records/companies/northstar
1223/// ```
1224///
1225/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1226/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1227/// a value back into the canonical emitted form before it is written:
1228///
1229/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1230///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1231///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1232///   that [`links_in_field_value`] reads back as the same scalar link;
1233/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1234///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1235///   `set` write-in path and the canonical list form;
1236/// - everything else → returned verbatim (the common no-op for non-link values).
1237///
1238/// `|display` is preserved in both link branches. This is the single point that
1239/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1240/// corrupting a pre-existing canonical link, since they all funnel through
1241/// `to_yaml`.
1242fn canonicalize_extra_value(value: &Value) -> Value {
1243    match value {
1244        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1245        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1246        // YAML nested sequence). Non-link strings are returned untouched.
1247        Value::String(s) => match parse_wiki_link_str(s) {
1248            Some(link) => Value::String(wiki_link_literal(&link)),
1249            None => value.clone(),
1250        },
1251        Value::Sequence(items) => {
1252            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1253            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1254            // scalar string so the link is preserved rather than block-emitted.
1255            if items.len() == 1 {
1256                if let Some(link) = unquoted_inline_link(&items[0]) {
1257                    return Value::String(wiki_link_literal(&link));
1258                }
1259            }
1260            // List of wiki-links: re-emit as a block sequence of quoted-link
1261            // strings, the canonical list form `to_yaml` renders block-style and
1262            // `links_in_field_value` accepts. Only canonicalize when *every* item
1263            // is a clean single wiki-link; a list with any non-link item is left
1264            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1265            // validate flags) are untouched.
1266            let mut links = Vec::with_capacity(items.len());
1267            for item in items {
1268                match link_from_flow_list_item(item) {
1269                    Some(link) => links.push(link),
1270                    None => return value.clone(),
1271                }
1272            }
1273            if links.is_empty() {
1274                return value.clone();
1275            }
1276            Value::Sequence(
1277                links
1278                    .iter()
1279                    .map(|l| Value::String(wiki_link_literal(l)))
1280                    .collect(),
1281            )
1282        }
1283        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1284        _ => value.clone(),
1285    }
1286}
1287
1288/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1289/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1290fn wiki_link_literal(link: &WikiLink) -> String {
1291    match &link.display {
1292        Some(d) => format!("[[{}|{}]]", link.target, d),
1293        None => format!("[[{}]]", link.target),
1294    }
1295}
1296
1297/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1298/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1299/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1300/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1301/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1302/// from being mistaken for a wiki-link.
1303fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1304    let Value::Sequence(items) = v else {
1305        return None;
1306    };
1307    if items.len() != 1 {
1308        return None;
1309    }
1310    let s = items[0].as_str()?;
1311    // A clean unquoted wiki-link has no further brackets inside it.
1312    if s.contains('[') || s.contains(']') {
1313        return None;
1314    }
1315    parse_wiki_link_str(&format!("[[{s}]]"))
1316}
1317
1318/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1319/// wiki-links** that should be stored as a YAML block sequence, returning the
1320/// canonical `Value::Sequence` of quoted-link strings when so.
1321///
1322/// The value path of every write surface stringifies its argument; without this
1323/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1324/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1325/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1326/// two list spellings an agent naturally types and normalizes both to the block
1327/// form the canonical writer emits and `dbmd validate` accepts:
1328///
1329/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1330/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1331///
1332/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1333/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1334/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1335/// renders inline), an empty list, or a list with any non-link item. A single
1336/// link must stay scalar; only genuine multi-item-or-explicit lists become
1337/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1338/// validator never disagree.
1339fn parse_link_list_value(value: &str) -> Option<Value> {
1340    let trimmed = value.trim();
1341    // Only a YAML *flow sequence* literal is a list candidate; anything not
1342    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1343    // the single-inline-link guard below).
1344    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1345        return None;
1346    }
1347    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1348        return None;
1349    };
1350    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1351    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1352    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1353    // list. `links_in_field_value` reads it back as a scalar link either way.
1354    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1355        return None;
1356    }
1357    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1358    // spellings an agent types (see [`link_from_flow_list_item`]).
1359    let mut links = Vec::with_capacity(items.len());
1360    for item in &items {
1361        links.push(link_from_flow_list_item(item)?);
1362    }
1363    if links.is_empty() {
1364        return None;
1365    }
1366    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1367    // renders block-style and `links_in_field_value` accepts. `|display` is
1368    // preserved.
1369    let normalized = links
1370        .iter()
1371        .map(|l| Value::String(wiki_link_literal(l)))
1372        .collect();
1373    Some(Value::Sequence(normalized))
1374}
1375
1376/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1377/// across the spellings an agent types for a list. After top-level flow parsing,
1378/// a list item arrives in one of:
1379///
1380/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1381/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1382///   (one level deeper than a bare unquoted scalar, because the surrounding list
1383///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1384///   `Seq[String(x)]` with [`unquoted_inline_link`].
1385///
1386/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1387/// caller falls back to a scalar string and never fabricates a partial list.
1388fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1389    match item {
1390        Value::String(s) => parse_wiki_link_str(s),
1391        Value::Sequence(inner) => {
1392            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1393            // wrapper to expose the inline-link shape.
1394            if inner.len() == 1 {
1395                if let Some(link) = unquoted_inline_link(&inner[0]) {
1396                    return Some(link);
1397                }
1398            }
1399            // Defensive: also accept the inline-link shape directly.
1400            unquoted_inline_link(item)
1401        }
1402        _ => None,
1403    }
1404}
1405
1406/// A target is a full store-relative path when its first path segment is one of
1407/// the three canonical layer dirs and at least one `/` separator follows. A
1408/// trailing `.md` does not affect this classification.
1409fn target_is_full_path(target: &str) -> bool {
1410    let target = target.trim();
1411    match target.split_once('/') {
1412        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1413        None => false,
1414    }
1415}
1416
1417/// True when the target carries a trailing `.md` extension (validate warns
1418/// `WIKI_LINK_HAS_EXTENSION`).
1419fn target_has_md_extension(target: &str) -> bool {
1420    target.trim().ends_with(".md")
1421}
1422
1423/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1424fn char_column(line: &str, byte_offset: usize) -> u32 {
1425    (line[..byte_offset].chars().count() as u32) + 1
1426}
1427
1428/// Map a lowercase shape keyword to its [`Shape`].
1429fn shape_from_str(s: &str) -> Option<Shape> {
1430    match s {
1431        "string" => Some(Shape::String),
1432        "int" => Some(Shape::Int),
1433        "bool" => Some(Shape::Bool),
1434        "date" => Some(Shape::Date),
1435        "email" => Some(Shape::Email),
1436        "currency" => Some(Shape::Currency),
1437        "url" => Some(Shape::Url),
1438        _ => None,
1439    }
1440}
1441
1442/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1443/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1444/// end-of-line) after the `#` run, caps the run at six.
1445fn heading_level(line: &str) -> u8 {
1446    let indent = line.len() - line.trim_start_matches(' ').len();
1447    if indent > 3 {
1448        return 0;
1449    }
1450    let rest = &line[indent..];
1451    let hashes = rest.len() - rest.trim_start_matches('#').len();
1452    if hashes == 0 || hashes > 6 {
1453        return 0;
1454    }
1455    let after = &rest[hashes..];
1456    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1457        hashes as u8
1458    } else {
1459        0
1460    }
1461}
1462
1463/// The heading text after the `#` run, trimmed, with any trailing ATX closing
1464/// `#` sequence removed (`## Title ##` → `Title`).
1465fn heading_text(line: &str, level: u8) -> String {
1466    let indent = line.len() - line.trim_start_matches(' ').len();
1467    let after_hashes = &line[indent + level as usize..];
1468    let trimmed = after_hashes.trim();
1469    let no_trailing = trimmed.trim_end_matches('#');
1470    if no_trailing.len() == trimmed.len() {
1471        trimmed.to_string()
1472    } else {
1473        no_trailing.trim_end().to_string()
1474    }
1475}
1476
1477/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1478fn opening_fence(line: &str) -> Option<(u8, usize)> {
1479    let indent = line.len() - line.trim_start_matches(' ').len();
1480    if indent > 3 {
1481        return None;
1482    }
1483    let rest = &line[indent..];
1484    let byte = rest.bytes().next()?;
1485    if byte != b'`' && byte != b'~' {
1486        return None;
1487    }
1488    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1489    if run < 3 {
1490        return None;
1491    }
1492    // A backtick fence's info string may not itself contain a backtick.
1493    if byte == b'`' && rest[run..].contains('`') {
1494        return None;
1495    }
1496    Some((byte, run))
1497}
1498
1499/// True if `line` closes the currently open fence: same char, run at least as
1500/// long, nothing but trailing whitespace after.
1501fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1502    let (byte, open_len) = fence;
1503    let indent = line.len() - line.trim_start_matches(' ').len();
1504    if indent > 3 {
1505        return false;
1506    }
1507    let rest = &line[indent..];
1508    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1509    if run < open_len {
1510        return false;
1511    }
1512    rest[run..].trim().is_empty()
1513}
1514
1515/// The prose body of a section: everything after the heading line, trimmed.
1516fn section_prose(section_body: &str) -> String {
1517    match section_body.split_once('\n') {
1518        Some((_heading, rest)) => rest.trim().to_string(),
1519        None => String::new(),
1520    }
1521}
1522
1523/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1524/// line, each returned with its leading whitespace trimmed.
1525fn bullet_lines(section_body: &str) -> Vec<String> {
1526    section_body
1527        .lines()
1528        .skip(1) // the heading line
1529        .map(str::trim)
1530        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1531        .map(|l| l.to_string())
1532        .collect()
1533}
1534
1535/// Cut a bullet's content at the first ` — ` / ` -- ` comment separator,
1536/// returning only the meaningful prefix.
1537fn strip_bullet_comment(content: &str) -> &str {
1538    let mut cut = content.len();
1539    for sep in [" — ", " -- ", " – "] {
1540        if let Some(idx) = content.find(sep) {
1541            cut = cut.min(idx);
1542        }
1543    }
1544    content[..cut].trim()
1545}
1546
1547/// Strip the leading bullet marker, returning the trimmed content after it.
1548fn bullet_content(bullet: &str) -> &str {
1549    let t = bullet.trim();
1550    t.strip_prefix("- ")
1551        .or_else(|| t.strip_prefix("* "))
1552        .or_else(|| t.strip_prefix("+ "))
1553        .unwrap_or(t)
1554        .trim()
1555}
1556
1557/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1558/// wrapped in backticks and followed by an em-dash comment.
1559fn extract_path_bullet(bullet: &str) -> String {
1560    let content = bullet_content(bullet);
1561    // Prefer a backtick-delimited span if present.
1562    if let Some(start) = content.find('`') {
1563        if let Some(end_rel) = content[start + 1..].find('`') {
1564            return content[start + 1..start + 1 + end_rel].trim().to_string();
1565        }
1566    }
1567    // Otherwise take the text up to a comment separator, stripping quotes.
1568    strip_bullet_comment(content)
1569        .trim_matches('"')
1570        .trim_matches('\'')
1571        .trim()
1572        .to_string()
1573}
1574
1575/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1576/// backticks/quotes and any trailing em-dash comment.
1577fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1578    let content = strip_bullet_comment(bullet_content(bullet));
1579    content
1580        .split(',')
1581        .map(|t| {
1582            t.trim()
1583                .trim_matches('`')
1584                .trim_matches('"')
1585                .trim_matches('\'')
1586                .trim()
1587                .to_string()
1588        })
1589        .filter(|t| !t.is_empty())
1590        .collect()
1591}
1592
1593#[cfg(test)]
1594mod tests {
1595    use super::*;
1596    use std::path::Path;
1597    use tempfile::tempdir;
1598
1599    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1600
1601    #[test]
1602    fn frozen_match_is_md_insensitive_both_directions() {
1603        // A policy entry stored WITHOUT `.md` (the natural extensionless
1604        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1605        // write target — the regression every write surface had.
1606        let cfg = Config {
1607            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1608            ..Config::default()
1609        };
1610        assert_eq!(
1611            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1612            Some(PathBuf::from("records/decisions/q1")),
1613            "extensionless policy entry must freeze the .md file"
1614        );
1615        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1616
1617        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1618        let cfg = Config {
1619            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1620            ..Config::default()
1621        };
1622        assert_eq!(
1623            cfg.frozen_match(Path::new("records/decisions/q1")),
1624            Some(PathBuf::from("records/decisions/q1.md")),
1625        );
1626        // And the same-spelling cases still match.
1627        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1628    }
1629
1630    #[test]
1631    fn frozen_match_drops_leading_dot_slash() {
1632        let cfg = Config {
1633            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1634            ..Config::default()
1635        };
1636        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
1637        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
1638    }
1639
1640    #[test]
1641    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
1642        let cfg = Config {
1643            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1644            ..Config::default()
1645        };
1646        assert!(cfg
1647            .frozen_match(Path::new("records/decisions/q2.md"))
1648            .is_none());
1649        // A prefix is not a match: `q1` must not freeze `q1-draft`.
1650        assert!(cfg
1651            .frozen_match(Path::new("records/decisions/q1-draft.md"))
1652            .is_none());
1653        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
1654    }
1655
1656    // ── split_frontmatter ───────────────────────────────────────────────────
1657
1658    #[test]
1659    fn split_frontmatter_separates_yaml_and_verbatim_body() {
1660        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
1661        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1662        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
1663        // Body is everything after the closing fence's newline, byte-for-byte.
1664        assert_eq!(p.body, "# Heading\n\nBody line.\n");
1665    }
1666
1667    #[test]
1668    fn split_frontmatter_preserves_body_without_trailing_newline() {
1669        let text = "---\ntype: x\n---\nno trailing newline";
1670        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1671        assert_eq!(p.body, "no trailing newline");
1672    }
1673
1674    #[test]
1675    fn split_frontmatter_empty_body_when_nothing_after_fence() {
1676        let text = "---\ntype: x\n---\n";
1677        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1678        assert_eq!(p.body, "");
1679    }
1680
1681    #[test]
1682    fn split_frontmatter_missing_opening_fence_errors() {
1683        let text = "# No frontmatter here\ntype: x\n";
1684        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1685        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1686    }
1687
1688    #[test]
1689    fn split_frontmatter_leading_content_before_fence_rejected() {
1690        // The opening fence must be the very first line; a blank line first is
1691        // not allowed.
1692        let text = "\n---\ntype: x\n---\nbody";
1693        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1694        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1695    }
1696
1697    #[test]
1698    fn split_frontmatter_unterminated_block_errors() {
1699        let text = "---\ntype: x\nsummary: y\n";
1700        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1701        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1702    }
1703
1704    // ── Frontmatter::parse ───────────────────────────────────────────────────
1705
1706    #[test]
1707    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
1708        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
1709        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1710        assert_eq!(fm.type_.as_deref(), Some("contact"));
1711        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
1712        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
1713        assert_eq!(fm.status.as_deref(), Some("active"));
1714        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
1715        // Type-specific fields are NOT promoted to typed slots.
1716        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
1717        assert!(!fm.extra.contains_key("tags"));
1718        assert_eq!(
1719            fm.extra.get("email").and_then(|v| v.as_str()),
1720            Some("sarah@northstar.io")
1721        );
1722        assert_eq!(
1723            fm.extra.get("role").and_then(|v| v.as_str()),
1724            Some("Director")
1725        );
1726    }
1727
1728    #[test]
1729    fn parse_reads_rfc3339_timestamps() {
1730        let yaml =
1731            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
1732        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1733        let created = fm.created.expect("created parsed");
1734        // -07:00 offset is 7 * 3600 seconds west.
1735        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
1736        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
1737        assert!(fm.updated.is_some());
1738    }
1739
1740    #[test]
1741    fn parse_rejects_non_rfc3339_timestamp() {
1742        // A date-only value is not a full RFC3339 timestamp; created/updated
1743        // require the full form.
1744        let yaml = "type: email\ncreated: 2026-05-27";
1745        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1746        match err {
1747            ParseError::BadTimestamp { key, value, .. } => {
1748                assert_eq!(key, "created");
1749                assert_eq!(value, "2026-05-27");
1750            }
1751            other => panic!("expected BadTimestamp, got {other:?}"),
1752        }
1753    }
1754
1755    #[test]
1756    fn parse_malformed_yaml_errors() {
1757        // Unclosed flow mapping is invalid YAML.
1758        let yaml = "type: contact\n  bad: : :\n- nope";
1759        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1760        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1761    }
1762
1763    #[test]
1764    fn frontmatter_with_yaml_tag_on_mapping_does_not_panic() {
1765        // Regression: a YAML tag on the top-level mapping made the old
1766        // `expect_err` path PANIC, because a tagged mapping deserializes to a
1767        // `Mapping` just fine. It must now be handled — accepted as the inner
1768        // mapping, never a panic.
1769        let fm = Frontmatter::parse("!mytag\ntype: contact\nsummary: hi\n", Path::new("x.md"))
1770            .expect("tagged-mapping frontmatter must parse, not panic");
1771        assert_eq!(fm.type_.as_deref(), Some("contact"));
1772        // A genuine scalar/sequence top level is still malformed (and still
1773        // doesn't panic).
1774        assert!(Frontmatter::parse("- a\n- b\n", Path::new("x.md")).is_err());
1775    }
1776
1777    #[test]
1778    fn parse_empty_block_is_empty_frontmatter() {
1779        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
1780        assert_eq!(fm, Frontmatter::default());
1781    }
1782
1783    #[test]
1784    fn parse_scalar_top_level_is_malformed() {
1785        // A bare scalar at the top level is not a frontmatter mapping.
1786        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
1787        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1788    }
1789
1790    // ── to_yaml canonical order ──────────────────────────────────────────────
1791
1792    #[test]
1793    fn to_yaml_emits_canonical_key_order() {
1794        let mut fm = Frontmatter {
1795            type_: Some("contact".into()),
1796            id: Some("sarah-chen".into()),
1797            summary: Some("Director of Ops".into()),
1798            status: Some("active".into()),
1799            tags: vec!["vip".into()],
1800            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1801            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
1802            ..Default::default()
1803        };
1804        // Two type-specific fields, inserted in NON-alphabetical order to prove
1805        // the writer sorts them (BTreeMap) between the universal head and tail.
1806        fm.extra
1807            .insert("role".into(), Value::String("Director".into()));
1808        fm.extra.insert(
1809            "company".into(),
1810            Value::String("[[records/companies/northstar]]".into()),
1811        );
1812
1813        let yaml = fm.to_yaml();
1814        let keys: Vec<&str> = yaml
1815            .lines()
1816            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
1817            .map(|l| l.split(':').next().unwrap())
1818            .collect();
1819        assert_eq!(
1820            keys,
1821            vec![
1822                "type", "id", "created", "updated", "summary", // universal head
1823                "company", "role",   // type-specific, sorted
1824                "status", // universal tail
1825                "tags",
1826            ],
1827            "canonical order violated; got:\n{yaml}"
1828        );
1829        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
1830        assert!(
1831            yaml.contains("2026-05-27T08:00:00-07:00"),
1832            "created timestamp missing; got:\n{yaml}"
1833        );
1834        // The value re-parses to the same instant regardless of quoting.
1835        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
1836        assert_eq!(reparsed.created, fm.created);
1837        assert_eq!(reparsed.updated, fm.updated);
1838    }
1839
1840    #[test]
1841    fn to_yaml_omits_absent_optional_fields() {
1842        let fm = Frontmatter {
1843            type_: Some("note".into()),
1844            ..Default::default()
1845        };
1846        let yaml = fm.to_yaml();
1847        assert!(yaml.contains("type: note"));
1848        assert!(!yaml.contains("status"));
1849        assert!(!yaml.contains("tags"));
1850        assert!(!yaml.contains("summary"));
1851    }
1852
1853    // ── Regression: non-string scalar universal fields round-trip (finding #1) ─
1854
1855    #[test]
1856    fn regression_parse_preserves_non_string_scalar_universal_fields() {
1857        // A hand/externally-authored file whose universal fields are bare
1858        // scalars YAML reads as Number/Bool — `id: 100`, `summary: 2026`,
1859        // `status: 0`, `type: 42` — must be PRESERVED as their string form, not
1860        // read as None. Before the fix, `v.as_str()` returned None for these and
1861        // the matched arm discarded the value entirely (never reaching `extra`).
1862        let yaml = "type: 42\nid: 100\nsummary: 2026\nstatus: 0";
1863        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
1864        assert_eq!(fm.type_.as_deref(), Some("42"), "type scalar dropped");
1865        assert_eq!(fm.id.as_deref(), Some("100"), "id scalar dropped");
1866        assert_eq!(
1867            fm.summary.as_deref(),
1868            Some("2026"),
1869            "summary scalar dropped"
1870        );
1871        assert_eq!(fm.status.as_deref(), Some("0"), "status scalar dropped");
1872        // The values must surface through the public `get` accessor too.
1873        assert_eq!(
1874            fm.get("summary")
1875                .and_then(|v| v.as_str().map(str::to_string)),
1876            Some("2026".to_string())
1877        );
1878    }
1879
1880    #[test]
1881    fn regression_format_round_trip_does_not_delete_numeric_frontmatter() {
1882        // The exact finding-#1 trigger: `dbmd format` is read_file -> write_file.
1883        // A file whose `id`/`summary`/`status` are bare numeric scalars must
1884        // still carry those fields after the canonical re-emit. Before the fix,
1885        // the lines were silently deleted from disk (only `type` survived).
1886        let dir = tempdir().unwrap();
1887        let path = dir.path().join("x.md");
1888        let original = "---\ntype: contact\nid: 100\nsummary: 2026\nstatus: 0\n---\nbody\n";
1889        std::fs::write(&path, original).unwrap();
1890
1891        // Re-emit through the canonical writer, exactly as `dbmd format` does.
1892        let (fm, body) = read_file(&path).unwrap();
1893        write_file(&path, &fm, &body).unwrap();
1894
1895        let after = std::fs::read_to_string(&path).unwrap();
1896        // None of the four fields may vanish; they survive as string scalars.
1897        let reparsed = Frontmatter::parse(
1898            &split_frontmatter(&after, &path).unwrap().frontmatter_yaml,
1899            &path,
1900        )
1901        .unwrap();
1902        assert_eq!(reparsed.type_.as_deref(), Some("contact"));
1903        assert_eq!(reparsed.id.as_deref(), Some("100"), "id deleted by format");
1904        assert_eq!(
1905            reparsed.summary.as_deref(),
1906            Some("2026"),
1907            "summary deleted by format"
1908        );
1909        assert_eq!(
1910            reparsed.status.as_deref(),
1911            Some("0"),
1912            "status deleted by format"
1913        );
1914        // The body is preserved verbatim.
1915        assert_eq!(body, "body\n");
1916    }
1917
1918    // ── Regression: BOM-prefixed files parse like store/index (finding #19) ────
1919
1920    #[test]
1921    fn regression_split_frontmatter_tolerates_leading_utf8_bom() {
1922        // A BOM-prefixed file (EF BB BF + `---\n...`) is walked and indexed by
1923        // `dbmd index` (store/index strip the BOM) but, before the fix, every
1924        // write/edit surface routed through `read_file` hard-failed with
1925        // MissingFrontmatter. `split_frontmatter` must now strip a single leading
1926        // U+FEFF and emit a BOM-free body.
1927        let text = "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n";
1928        let parsed = split_frontmatter(text, Path::new("note.md")).unwrap();
1929        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
1930        // Body never carries the BOM forward into the canonical writer.
1931        assert_eq!(parsed.body, "body\n");
1932        assert!(!parsed.body.starts_with('\u{feff}'));
1933    }
1934
1935    #[test]
1936    fn regression_read_file_parses_bom_prefixed_file() {
1937        // End-to-end through the same `read_file` path `dbmd fm get/set`,
1938        // `format`, `link`, and `write` use. Before the fix this returned
1939        // Err(MissingFrontmatter) on a file the catalog had already indexed.
1940        let dir = tempdir().unwrap();
1941        let path = dir.path().join("note.md");
1942        std::fs::write(&path, "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n").unwrap();
1943
1944        let (fm, body) = read_file(&path).expect("BOM-prefixed file must parse");
1945        assert_eq!(fm.type_.as_deref(), Some("note"));
1946        assert_eq!(fm.summary.as_deref(), Some("x"));
1947        assert_eq!(body, "body\n");
1948    }
1949
1950    #[test]
1951    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
1952        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
1953        // *unquoted* inline `company: [[records/companies/northstar]]`
1954        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
1955        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
1956        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
1957        //     company:
1958        //     - - records/companies/northstar
1959        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
1960        // any `fm set` / `link` write) silently destroyed the link.
1961        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
1962        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1963        // Sanity: it really parsed as the nested sequence, not a string.
1964        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
1965
1966        let out = fm.to_yaml();
1967        // The link must survive as a quoted inline scalar — brackets intact, and
1968        // never the bracket-less block sequence `- - records/...`.
1969        assert!(
1970            out.contains("[[records/companies/northstar]]"),
1971            "canonical writer dropped the wiki-link brackets; got:\n{out}"
1972        );
1973        assert!(
1974            !out.contains("- - "),
1975            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
1976        );
1977
1978        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
1979        // one link with the right target (the edge graph/backlinks rely on).
1980        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1981        let fields = reparsed.link_fields();
1982        let links: Vec<(&str, &str, Option<&str>)> = fields
1983            .iter()
1984            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
1985            .collect();
1986        assert_eq!(
1987            links,
1988            vec![("company", "records/companies/northstar", None)]
1989        );
1990
1991        // A second re-emit is a fixed point — no progressive corruption across
1992        // repeated curator-loop writes.
1993        assert_eq!(
1994            reparsed.to_yaml(),
1995            out,
1996            "to_yaml is not idempotent on links"
1997        );
1998    }
1999
2000    #[test]
2001    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
2002        // The `|display` segment must survive the unquoted-inline round-trip too.
2003        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
2004        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2005        let out = fm.to_yaml();
2006        assert!(
2007            out.contains("[[records/companies/northstar|Northstar]]"),
2008            "display segment lost on round-trip; got:\n{out}"
2009        );
2010        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
2011        let f = reparsed.link_fields();
2012        assert_eq!(f.len(), 1);
2013        assert_eq!(f[0].1.target, "records/companies/northstar");
2014        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
2015    }
2016
2017    #[test]
2018    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
2019        // A genuine quoted block list of links round-trips as a clean string
2020        // list — never collapsed to a scalar — and a plain nested sequence that
2021        // is NOT a wiki-link is left exactly as written (no false conversion).
2022        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
2023        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2024        let out = fm.to_yaml();
2025
2026        // Both attendee links survive as quoted strings.
2027        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
2028        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
2029
2030        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
2031        let fields = reparsed.link_fields();
2032        let attendees: Vec<&str> = fields
2033            .iter()
2034            .filter(|(k, _)| k == "attendees")
2035            .map(|(_, l)| l.target.as_str())
2036            .collect();
2037        assert_eq!(
2038            attendees,
2039            vec!["records/contacts/elena", "records/contacts/sarah"]
2040        );
2041        // The non-link nested sequence is preserved verbatim, not touched.
2042        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
2043    }
2044
2045    // ── read_file / write_file round-trip ────────────────────────────────────
2046
2047    #[test]
2048    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
2049        let dir = tempdir().unwrap();
2050        let path = dir.path().join("sources/emails/x.md");
2051        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
2052        let mut fm = Frontmatter {
2053            type_: Some("email".into()),
2054            summary: Some("renewal note".into()),
2055            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2056            ..Default::default()
2057        };
2058        fm.extra
2059            .insert("from".into(), Value::String("elena@northstar.io".into()));
2060
2061        write_file(&path, &fm, body).unwrap();
2062
2063        let (read_fm, read_body) = read_file(&path).unwrap();
2064        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
2065        assert_eq!(read_fm.type_.as_deref(), Some("email"));
2066        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
2067        assert_eq!(
2068            read_fm.extra.get("from").and_then(|v| v.as_str()),
2069            Some("elena@northstar.io")
2070        );
2071        // The on-disk file starts with a fence and ends with the verbatim body.
2072        let raw = std::fs::read_to_string(&path).unwrap();
2073        assert!(raw.starts_with("---\n"));
2074        assert!(raw.ends_with(body));
2075    }
2076
2077    #[test]
2078    fn roundtrip_modify_summary_then_write_changes_only_summary() {
2079        let dir = tempdir().unwrap();
2080        let path = dir.path().join("records/contacts/sarah.md");
2081        let body = "Long-form operator notes about Sarah.\n";
2082        let fm = Frontmatter {
2083            type_: Some("contact".into()),
2084            summary: Some("old summary".into()),
2085            ..Default::default()
2086        };
2087        write_file(&path, &fm, body).unwrap();
2088
2089        // Read → modify summary → write back.
2090        let (mut fm2, body2) = read_file(&path).unwrap();
2091        fm2.summary = Some("new summary".into());
2092        write_file(&path, &fm2, &body2).unwrap();
2093
2094        let (fm3, body3) = read_file(&path).unwrap();
2095        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
2096        assert_eq!(fm3.type_.as_deref(), Some("contact"));
2097        assert_eq!(body3, body, "body unchanged across the round-trip");
2098    }
2099
2100    #[test]
2101    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
2102        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
2103        // a hand-written file carrying the canonical UNQUOTED scalar link
2104        // `company: [[records/companies/northstar]]`, read from disk then written
2105        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
2106        // value to the bracket-less block sequence `company:\n- - records/...`,
2107        // and every reader (validate/graph/backlinks) then lost the edge.
2108        let dir = tempdir().unwrap();
2109        let path = dir.path().join("records/contacts/sarah-chen.md");
2110        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
2111        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2112        std::fs::write(&path, file).unwrap();
2113
2114        // Read → write back unchanged (the canonical no-op re-emit).
2115        let (fm, body) = read_file(&path).unwrap();
2116        write_file(&path, &fm, &body).unwrap();
2117
2118        // On-disk bytes still carry the bracketed link, never `- - records/...`.
2119        let raw = std::fs::read_to_string(&path).unwrap();
2120        assert!(
2121            raw.contains("[[records/companies/northstar]]"),
2122            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
2123        );
2124        assert!(
2125            !raw.contains("- - "),
2126            "on-disk value became a nested block sequence; got:\n{raw}"
2127        );
2128
2129        // And the edge is still readable after the round-trip.
2130        let (fm2, _) = read_file(&path).unwrap();
2131        let fields = fm2.link_fields();
2132        let links: Vec<(&str, &str)> = fields
2133            .iter()
2134            .map(|(k, l)| (k.as_str(), l.target.as_str()))
2135            .collect();
2136        assert_eq!(links, vec![("company", "records/companies/northstar")]);
2137    }
2138
2139    #[test]
2140    fn write_file_does_not_leave_temp_files_behind() {
2141        let dir = tempdir().unwrap();
2142        let path = dir.path().join("records/x.md");
2143        let fm = Frontmatter {
2144            type_: Some("note".into()),
2145            ..Default::default()
2146        };
2147        write_file(&path, &fm, "body\n").unwrap();
2148        // The directory should contain only the target file, no `.x.md.tmp.*`.
2149        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
2150            .unwrap()
2151            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
2152            .collect();
2153        assert_eq!(entries, vec!["x.md".to_string()]);
2154    }
2155
2156    // ── is_content_file ──────────────────────────────────────────────────────
2157
2158    #[test]
2159    fn is_content_file_recognizes_layers_and_excludes_meta() {
2160        assert!(Frontmatter::is_content_file(Path::new(
2161            "sources/emails/2026-05-22.md"
2162        )));
2163        assert!(Frontmatter::is_content_file(Path::new(
2164            "records/contacts/sarah-chen.md"
2165        )));
2166        assert!(Frontmatter::is_content_file(Path::new(
2167            "wiki/people/sarah-chen.md"
2168        )));
2169        // Absolute paths under a layer are still content.
2170        assert!(Frontmatter::is_content_file(Path::new(
2171            "/home/db/records/companies/northstar.md"
2172        )));
2173        // index.md at any level is meta.
2174        assert!(!Frontmatter::is_content_file(Path::new(
2175            "records/contacts/index.md"
2176        )));
2177        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2178        // Root meta files.
2179        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2180        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2181    }
2182
2183    // ── effective_id ─────────────────────────────────────────────────────────
2184
2185    #[test]
2186    fn effective_id_prefers_explicit_then_derives_from_path() {
2187        let with_id = Frontmatter {
2188            id: Some("explicit-id".into()),
2189            ..Default::default()
2190        };
2191        assert_eq!(
2192            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2193            "explicit-id"
2194        );
2195        let no_id = Frontmatter::default();
2196        assert_eq!(
2197            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2198            "sarah-chen"
2199        );
2200    }
2201
2202    // ── get / set ────────────────────────────────────────────────────────────
2203
2204    #[test]
2205    fn set_routes_universal_and_custom_keys() {
2206        let mut fm = Frontmatter::default();
2207        fm.set("type", "contact").unwrap();
2208        fm.set("summary", "hi").unwrap();
2209        fm.set("company", "[[records/companies/northstar]]")
2210            .unwrap();
2211        assert_eq!(fm.type_.as_deref(), Some("contact"));
2212        assert_eq!(fm.summary.as_deref(), Some("hi"));
2213        // Custom key landed in extra, not a typed slot.
2214        assert_eq!(
2215            fm.extra.get("company").and_then(|v| v.as_str()),
2216            Some("[[records/companies/northstar]]")
2217        );
2218        // get reads from both typed fields and extra.
2219        assert_eq!(
2220            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2221            Some("contact".into())
2222        );
2223        assert_eq!(
2224            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2225            Some("[[records/companies/northstar]]".into())
2226        );
2227        assert!(fm.get("nonexistent").is_none());
2228    }
2229
2230    #[test]
2231    fn set_timestamp_validates_rfc3339() {
2232        let mut fm = Frontmatter::default();
2233        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2234        assert!(fm.created.is_some());
2235        let err = fm.set("updated", "not-a-date").unwrap_err();
2236        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2237    }
2238
2239    // ── extract_wiki_links ───────────────────────────────────────────────────
2240
2241    #[test]
2242    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2243        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[wiki/people/sarah-chen.md|Sarah]].\n";
2244        let links = extract_wiki_links(body, Path::new("doc.md"));
2245        assert_eq!(links.len(), 3);
2246
2247        // Full path, no extension, no display.
2248        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2249        assert!(links[0].is_full_path);
2250        assert!(!links[0].has_md_extension);
2251        assert_eq!(links[0].display, None);
2252        assert_eq!(links[0].location.1, 1, "first link on line 1");
2253
2254        // Short form: not a full path.
2255        assert_eq!(links[1].target, "sarah-chen");
2256        assert!(!links[1].is_full_path, "bare target is short-form");
2257
2258        // Full path WITH .md extension and a display override on line 2.
2259        assert_eq!(links[2].target, "wiki/people/sarah-chen.md");
2260        assert!(links[2].is_full_path);
2261        assert!(links[2].has_md_extension);
2262        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2263        assert_eq!(links[2].location.1, 2);
2264    }
2265
2266    #[test]
2267    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2268        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2269        let body = "café [[records/x/y]]";
2270        let links = extract_wiki_links(body, Path::new("d.md"));
2271        assert_eq!(links.len(), 1);
2272        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2273        assert_eq!(links[0].location.2, 6);
2274    }
2275
2276    #[test]
2277    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2278        let links = extract_wiki_links(
2279            "records/contacts/sarah-chen is not a link",
2280            Path::new("d.md"),
2281        );
2282        assert!(links.is_empty());
2283    }
2284
2285    // ── extract_markdown_links ───────────────────────────────────────────────
2286
2287    #[test]
2288    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2289        let body =
2290            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2291        let md = extract_markdown_links(body, Path::new("d.md"));
2292        assert_eq!(
2293            md.len(),
2294            1,
2295            "wiki-link must not be captured as a markdown link"
2296        );
2297        assert_eq!(md[0].text, "the thread");
2298        assert_eq!(md[0].url, "https://x.com/a");
2299        assert_eq!(md[0].location.1, 1);
2300
2301        // And the wiki-link extractor must not pick up the markdown link.
2302        let wl = extract_wiki_links(body, Path::new("d.md"));
2303        assert_eq!(wl.len(), 1);
2304        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2305    }
2306
2307    // ── link_fields ──────────────────────────────────────────────────────────
2308
2309    #[test]
2310    fn link_fields_extracts_scalar_list_and_summary_links() {
2311        // The canonical list form quotes each item so YAML parses it as clean
2312        // strings; a scalar field may be quoted OR written in the canonical
2313        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2314        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2315        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2316        // Sanity: company really did parse as a scalar string here.
2317        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2318        let fields = fm.link_fields();
2319
2320        // company (scalar) once, with the right target.
2321        let company: Vec<&str> = fields
2322            .iter()
2323            .filter(|(k, _)| k == "company")
2324            .map(|(_, l)| l.target.as_str())
2325            .collect();
2326        assert_eq!(company, vec!["records/companies/northstar"]);
2327        // attendees (block list) twice.
2328        let attendees: Vec<&str> = fields
2329            .iter()
2330            .filter(|(k, _)| k == "attendees")
2331            .map(|(_, l)| l.target.as_str())
2332            .collect();
2333        assert_eq!(
2334            attendees,
2335            vec!["records/contacts/elena", "records/contacts/sarah"]
2336        );
2337        // summary link surfaced.
2338        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2339        // Plain-text field is not a link.
2340        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2341    }
2342
2343    #[test]
2344    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2345        // Regression: the canonical scalar wiki-link form is the *unquoted*
2346        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2347        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2348        // a naive `as_str()`-only walk drops it. link_fields() must still
2349        // surface exactly one link with the correct target.
2350        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2351        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2352        // Sanity: it really did parse as the nested sequence form, NOT a string.
2353        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2354
2355        let fields = fm.link_fields();
2356        let links: Vec<(&str, &str, Option<&str>)> = fields
2357            .iter()
2358            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2359            .collect();
2360        assert_eq!(
2361            links,
2362            vec![("company", "records/companies/northstar", None)]
2363        );
2364
2365        // The `|display` segment survives the unquoted inline form too.
2366        let fm2 = Frontmatter::parse(
2367            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2368            Path::new("m.md"),
2369        )
2370        .unwrap();
2371        let f2 = fm2.link_fields();
2372        assert_eq!(f2.len(), 1);
2373        assert_eq!(f2[0].0, "company");
2374        assert_eq!(f2[0].1.target, "records/companies/northstar");
2375        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2376    }
2377
2378    #[test]
2379    fn link_fields_ignores_plain_one_item_flow_list() {
2380        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2381        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2382        // NOT be mistaken for a wiki-link.
2383        let yaml = "type: contact\naliases: [foo]";
2384        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2385        assert_eq!(fm.link_fields(), Vec::new());
2386    }
2387
2388    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2389
2390    #[test]
2391    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2392        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2393        // inline wiki-link (double-nested) is NOT.
2394        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2395        let flagged = detect_flow_form_link_lists(bad);
2396        assert_eq!(flagged, vec!["attendees".to_string()]);
2397
2398        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2399        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2400        assert_eq!(
2401            detect_flow_form_link_lists(unquoted_block),
2402            vec!["attendees".to_string()]
2403        );
2404
2405        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2406        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2407        assert!(detect_flow_form_link_lists(good).is_empty());
2408
2409        // A plain scalar list of strings is not flagged.
2410        let plain = "tags: [a, b, c]";
2411        assert!(detect_flow_form_link_lists(plain).is_empty());
2412    }
2413
2414    // ── extract_sections ─────────────────────────────────────────────────────
2415
2416    #[test]
2417    fn extract_sections_levels_nesting_and_boundaries() {
2418        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2419        let secs = extract_sections(body);
2420        let headings: Vec<(&str, u8)> =
2421            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2422        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2423
2424        // "First" (H2) body extends through its H3 child, stopping at "Second".
2425        let first = &secs[0];
2426        assert!(first.body.contains("alpha"));
2427        assert!(first.body.contains("### Sub"));
2428        assert!(first.body.contains("beta"));
2429        assert!(!first.body.contains("Second"));
2430
2431        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2432        let sub = &secs[1];
2433        assert!(sub.body.contains("beta"));
2434        assert!(!sub.body.contains("gamma"));
2435
2436        // 1-based line numbers within the body.
2437        assert_eq!(first.line, 2);
2438        assert_eq!(secs[2].line, 6);
2439    }
2440
2441    #[test]
2442    fn extract_sections_ignores_headings_in_fenced_code() {
2443        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2444        let secs = extract_sections(body);
2445        assert_eq!(secs.len(), 1);
2446        assert_eq!(secs[0].heading, "Real");
2447        // The fenced "## Fake" is part of Real's body, not its own section.
2448        assert!(secs[0].body.contains("## Fake heading in code"));
2449    }
2450
2451    // ── parse_field_spec ─────────────────────────────────────────────────────
2452
2453    #[test]
2454    fn parse_field_spec_required_and_shape() {
2455        let f = parse_field_spec("- email (required, email)");
2456        assert_eq!(f.name, "email");
2457        assert!(f.required);
2458        assert_eq!(f.shape, Some(Shape::Email));
2459        assert!(f.unknown_modifiers.is_empty());
2460    }
2461
2462    #[test]
2463    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2464        let f = parse_field_spec("- company (required, link to records/companies/)");
2465        assert!(f.required);
2466        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2467        assert_eq!(f.shape, None);
2468    }
2469
2470    #[test]
2471    fn parse_field_spec_default_preserves_case_and_value() {
2472        let f = parse_field_spec("- currency (default USD)");
2473        assert_eq!(f.name, "currency");
2474        assert_eq!(f.default, Some(Value::String("USD".into())));
2475    }
2476
2477    #[test]
2478    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2479        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2480        assert!(f.required);
2481        assert_eq!(
2482            f.enum_values,
2483            Some(vec![
2484                "open".to_string(),
2485                "closed".to_string(),
2486                "pending".to_string()
2487            ])
2488        );
2489    }
2490
2491    #[test]
2492    fn parse_field_spec_bare_enum_keyword_is_not_itself_a_value() {
2493        // `enum` with no colon: the values are the remaining tokens; the keyword
2494        // itself must NOT leak in as an allowed value.
2495        let f = parse_field_spec("- status (required, enum, open, closed)");
2496        assert!(f.required);
2497        assert_eq!(
2498            f.enum_values,
2499            Some(vec!["open".to_string(), "closed".to_string()])
2500        );
2501    }
2502
2503    #[test]
2504    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2505        let f = parse_field_spec("- weird (required, frobnicate, string)");
2506        assert!(f.required);
2507        assert_eq!(f.shape, Some(Shape::String));
2508        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2509    }
2510
2511    #[test]
2512    fn parse_field_spec_no_parens_is_freeform_optional() {
2513        let f = parse_field_spec("- nickname");
2514        assert_eq!(f.name, "nickname");
2515        assert!(!f.required);
2516        assert_eq!(f.shape, None);
2517        assert!(f.link_prefix.is_none());
2518        assert!(f.enum_values.is_none());
2519        assert!(f.unknown_modifiers.is_empty());
2520    }
2521
2522    // ── parse_schema_bullet (directives) ─────────────────────────────────────
2523
2524    #[test]
2525    fn schema_bullet_unique_single_field() {
2526        match parse_schema_bullet("- unique: email") {
2527            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
2528            other => panic!("expected Unique, got {other:?}"),
2529        }
2530    }
2531
2532    #[test]
2533    fn schema_bullet_unique_compound_trims_and_splits() {
2534        match parse_schema_bullet("- unique: date, amount , vendor") {
2535            SchemaBullet::Unique(fields) => assert_eq!(
2536                fields,
2537                vec![
2538                    "date".to_string(),
2539                    "amount".to_string(),
2540                    "vendor".to_string()
2541                ]
2542            ),
2543            other => panic!("expected Unique, got {other:?}"),
2544        }
2545    }
2546
2547    #[test]
2548    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
2549        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
2550            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
2551            other => panic!("expected SummaryTemplate, got {other:?}"),
2552        }
2553    }
2554
2555    #[test]
2556    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
2557        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
2558        // a directive — its head has a `(` before any `:`.
2559        match parse_schema_bullet("- status (enum: open, closed)") {
2560            SchemaBullet::Field(f) => {
2561                assert_eq!(f.name, "status");
2562                assert_eq!(
2563                    f.enum_values,
2564                    Some(vec!["open".to_string(), "closed".to_string()])
2565                );
2566            }
2567            other => panic!("expected Field, got {other:?}"),
2568        }
2569    }
2570
2571    #[test]
2572    fn parse_db_md_schema_captures_unique_and_summary_template() {
2573        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
2574        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2575        let s = config.schemas.get("contact").expect("contact schema");
2576        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
2577        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
2578        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
2579    }
2580
2581    #[test]
2582    fn schema_bullet_shard_directive_parses_values() {
2583        assert!(matches!(
2584            parse_schema_bullet("- shard: by-date"),
2585            SchemaBullet::Shard(Some(true))
2586        ));
2587        assert!(matches!(
2588            parse_schema_bullet("- shard: flat"),
2589            SchemaBullet::Shard(Some(false))
2590        ));
2591        // An unrecognized value is ignored (None), like an unknown modifier.
2592        assert!(matches!(
2593            parse_schema_bullet("- shard: weekly"),
2594            SchemaBullet::Shard(None)
2595        ));
2596        // A field whose name has a `(` before any `:` is still a field — the same
2597        // guard that keeps `- status (enum: a, b)` a field, not a directive.
2598        assert!(matches!(
2599            parse_schema_bullet("- shardiness (string)"),
2600            SchemaBullet::Field(_)
2601        ));
2602    }
2603
2604    #[test]
2605    fn parse_db_md_schema_captures_shard_directive() {
2606        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### shipment\n- carrier (string)\n- shard: by-date\n\n### contact\n- shard: flat\n";
2607        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2608        let shipment = config.schemas.get("shipment").expect("shipment schema");
2609        assert_eq!(shipment.shard, Some(true));
2610        assert_eq!(
2611            shipment.fields.len(),
2612            1,
2613            "`shard:` is a directive, not a field"
2614        );
2615        assert_eq!(config.schemas.get("contact").unwrap().shard, Some(false));
2616    }
2617
2618    // ── parse_db_md ──────────────────────────────────────────────────────────
2619
2620    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2621
2622    #[test]
2623    fn parse_db_md_extracts_all_canonical_sections() {
2624        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2625
2626        // Agent instructions: free-form prose, heading line stripped.
2627        let ai = config
2628            .agent_instructions
2629            .expect("agent instructions present");
2630        assert!(ai.starts_with("Prioritize creating"));
2631        assert!(!ai.contains("## Agent instructions"));
2632
2633        // Frozen pages: paths extracted from backticked bullets, comments dropped.
2634        assert_eq!(
2635            config.frozen_pages,
2636            vec![
2637                PathBuf::from("records/decisions/2026-q1-strategy.md"),
2638                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
2639            ]
2640        );
2641
2642        // Ignored types: comma list, backticks/comment stripped.
2643        assert_eq!(
2644            config.ignored_types,
2645            vec!["test".to_string(), "temp".to_string()]
2646        );
2647
2648        // Schemas: two types, each with its fields in source order.
2649        assert_eq!(config.schemas.len(), 2);
2650        let contact = config.schemas.get("contact").expect("contact schema");
2651        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
2652        assert_eq!(names, vec!["name", "email", "company", "role"]);
2653        assert!(contact.fields[0].required); // name
2654        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
2655        assert_eq!(
2656            contact.fields[2].link_prefix,
2657            Some(PathBuf::from("records/companies"))
2658        ); // company
2659
2660        let expense = config.schemas.get("expense").expect("expense schema");
2661        let cur = expense
2662            .fields
2663            .iter()
2664            .find(|f| f.name == "currency")
2665            .unwrap();
2666        assert_eq!(cur.default, Some(Value::String("USD".into())));
2667    }
2668
2669    #[test]
2670    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
2671        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
2672        // unknown modifier, and bullets that appear with NO `### <type>`
2673        // heading (so they belong to no schema and are dropped).
2674        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
2675        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2676
2677        // The orphan bullet under `## Schemas` with no `### type` heading is not
2678        // captured as a schema.
2679        assert_eq!(config.schemas.len(), 1);
2680        let ticket = config.schemas.get("ticket").expect("ticket schema");
2681        assert_eq!(ticket.fields.len(), 2);
2682
2683        let priority = &ticket.fields[0];
2684        assert!(priority.required);
2685        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
2686        assert_eq!(
2687            priority.enum_values,
2688            Some(vec!["low".to_string(), "high".to_string()])
2689        );
2690
2691        // A bullet with an unclosed paren still yields a usable name.
2692        let broken = &ticket.fields[1];
2693        assert_eq!(broken.name, "broken");
2694    }
2695
2696    #[test]
2697    fn parse_db_md_missing_frontmatter_errors() {
2698        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
2699        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
2700        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2701    }
2702
2703    #[test]
2704    fn parse_db_md_absent_sections_default_empty() {
2705        let text = "---\ntype: db-md\n---\n\n# Title only\n";
2706        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2707        assert_eq!(config, Config::default());
2708    }
2709
2710    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
2711
2712    /// `Frontmatter::set` is the value path every write surface (`fm set`,
2713    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
2714    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
2715    /// quoted links — readable back by [`links_in_field_value`] and accepted by
2716    /// `dbmd validate` — never the flow-form scalar string that trips
2717    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
2718    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
2719    #[test]
2720    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
2721        for value in [
2722            "[[[records/contacts/a]], [[records/contacts/b]]]",
2723            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
2724        ] {
2725            let mut fm = Frontmatter::default();
2726            fm.set("attendees", value).unwrap();
2727
2728            // Stored as a 2-element sequence of clean quoted links.
2729            let stored = fm.extra.get("attendees").expect("attendees set");
2730            let Value::Sequence(items) = stored else {
2731                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
2732            };
2733            assert_eq!(items.len(), 2, "input {value}");
2734            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
2735            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
2736
2737            // The edge enumerator reads exactly the two links back (no stray
2738            // bracket targets, the flow-form-string symptom).
2739            let links: Vec<_> = links_in_field_value(stored)
2740                .into_iter()
2741                .map(|l| l.target)
2742                .collect();
2743            assert_eq!(
2744                links,
2745                vec!["records/contacts/a", "records/contacts/b"],
2746                "input {value}"
2747            );
2748
2749            // And the canonical writer renders it block-style, not as a scalar.
2750            let yaml = fm.to_yaml();
2751            assert!(
2752                yaml.contains("attendees:\n"),
2753                "expected block list in:\n{yaml}"
2754            );
2755            assert!(
2756                !yaml.contains("attendees: '[["),
2757                "must not be a flow-form scalar string in:\n{yaml}"
2758            );
2759        }
2760    }
2761
2762    /// A *single* inline wiki-link stays a scalar string (renders inline
2763    /// `field: [[x]]`), and a single link must never be widened to a one-item
2764    /// list — preserving the common `contact.company` / `expense.vendor` shape.
2765    #[test]
2766    fn set_single_inline_wiki_link_stays_scalar() {
2767        let mut fm = Frontmatter::default();
2768        fm.set("company", "[[records/companies/tideform]]").unwrap();
2769        assert_eq!(
2770            fm.extra.get("company"),
2771            Some(&Value::String("[[records/companies/tideform]]".into())),
2772        );
2773        // Still recognized as one link.
2774        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
2775            .into_iter()
2776            .map(|l| l.target)
2777            .collect();
2778        assert_eq!(links, vec!["records/companies/tideform"]);
2779    }
2780
2781    /// Plain text and a non-link flow list are left as verbatim scalar strings —
2782    /// the list normalization only triggers when every item is a clean wiki-link.
2783    #[test]
2784    fn set_non_link_values_stay_scalar_strings() {
2785        let mut fm = Frontmatter::default();
2786        fm.set("location", "Video call (remote)").unwrap();
2787        assert_eq!(
2788            fm.extra.get("location"),
2789            Some(&Value::String("Video call (remote)".into())),
2790        );
2791
2792        // A flow list whose items are NOT wiki-links must not be reinterpreted as
2793        // a link sequence; it stays the scalar string the agent passed.
2794        fm.set("note", "[draft, wip]").unwrap();
2795        assert_eq!(
2796            fm.extra.get("note"),
2797            Some(&Value::String("[draft, wip]".into()))
2798        );
2799    }
2800}