Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_yml::{Mapping, Value};
18
19/// The three canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 3] = ["sources", "records", "wiki"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_yml::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `id` — optional; derived from the file path when absent.
81    pub id: Option<String>,
82    /// `created` — RFC3339; required and auto-set on content-file create.
83    pub created: Option<DateTime<FixedOffset>>,
84    /// `updated` — RFC3339; required and auto-maintained on content files.
85    pub updated: Option<DateTime<FixedOffset>>,
86    /// `summary` — the one-line catalog line; required on every content file.
87    pub summary: Option<String>,
88    /// `status` — optional lifecycle state.
89    pub status: Option<String>,
90    /// `tags` — optional flat list of short scalar labels.
91    pub tags: Vec<String>,
92    /// All other frontmatter keys (type-specific + custom), preserved verbatim
93    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
94    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
95    /// [`WikiLink`]s.
96    pub extra: BTreeMap<String, Value>,
97}
98
99impl Frontmatter {
100    /// Parse a YAML frontmatter block (the text between the opening and closing
101    /// `---` fences, exclusive) into a [`Frontmatter`].
102    ///
103    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
104    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
105    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
106        // An empty (or whitespace-only) frontmatter block is a valid, empty
107        // mapping — not a YAML error.
108        let value: Value = if yaml.trim().is_empty() {
109            Value::Mapping(Mapping::new())
110        } else {
111            serde_yml::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
112                file: file.to_path_buf(),
113                source,
114            })?
115        };
116
117        // Top-level frontmatter must be a mapping. A scalar or sequence at the
118        // top level is malformed for our purposes; surface it as such.
119        let map = match value {
120            Value::Mapping(m) => m,
121            Value::Null => Mapping::new(),
122            other => {
123                // serde_yml::Error has no public constructor, so manufacture a
124                // representative one by deserializing the (sequence/scalar)
125                // value into a Mapping, which always fails with a type error.
126                let source = serde_yml::from_value::<Mapping>(other)
127                    .expect_err("non-mapping frontmatter top level deserializes to Mapping");
128                return Err(ParseError::MalformedYaml {
129                    file: file.to_path_buf(),
130                    source,
131                });
132            }
133        };
134
135        let mut fm = Frontmatter::default();
136        for (k, v) in map {
137            let key = match k.as_str() {
138                Some(s) => s.to_string(),
139                // Non-string keys are unusual; stringify defensively and keep
140                // them in `extra` so nothing is silently dropped.
141                None => format!("{k:?}"),
142            };
143            match key.as_str() {
144                "type" => fm.type_ = v.as_str().map(str::to_string),
145                "id" => fm.id = v.as_str().map(str::to_string),
146                "created" => fm.created = parse_timestamp(&v, "created", file)?,
147                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
148                "summary" => fm.summary = v.as_str().map(str::to_string),
149                "status" => fm.status = v.as_str().map(str::to_string),
150                "tags" => fm.tags = parse_tags(&v),
151                _ => {
152                    fm.extra.insert(key, v);
153                }
154            }
155        }
156        Ok(fm)
157    }
158
159    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
160    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
161    pub fn to_yaml(&self) -> String {
162        // Build an order-preserving mapping in canonical key order:
163        //   type, id, created, updated, summary  (universal head)
164        //   <type-specific extra, BTreeMap-sorted>
165        //   status, tags                          (universal tail)
166        // serde_yml::Mapping preserves insertion order, so one serialize call
167        // emits the block in exactly this order with correct YAML quoting.
168        let mut map = Mapping::new();
169
170        if let Some(t) = &self.type_ {
171            map.insert(Value::String("type".into()), Value::String(t.clone()));
172        }
173        if let Some(id) = &self.id {
174            map.insert(Value::String("id".into()), Value::String(id.clone()));
175        }
176        if let Some(created) = &self.created {
177            map.insert(
178                Value::String("created".into()),
179                Value::String(created.to_rfc3339()),
180            );
181        }
182        if let Some(updated) = &self.updated {
183            map.insert(
184                Value::String("updated".into()),
185                Value::String(updated.to_rfc3339()),
186            );
187        }
188        if let Some(summary) = &self.summary {
189            map.insert(
190                Value::String("summary".into()),
191                Value::String(summary.clone()),
192            );
193        }
194
195        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
196        // is canonicalized so a wiki-link round-trips to the form the writer and
197        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
198        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
199        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
200        // block sequence `- - x` that a verbatim re-emit would produce and that
201        // destroys the link. See [`canonicalize_extra_value`].
202        for (k, v) in &self.extra {
203            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
204        }
205
206        if let Some(status) = &self.status {
207            map.insert(
208                Value::String("status".into()),
209                Value::String(status.clone()),
210            );
211        }
212        if !self.tags.is_empty() {
213            map.insert(
214                Value::String("tags".into()),
215                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
216            );
217        }
218
219        if map.is_empty() {
220            return String::new();
221        }
222        serde_yml::to_string(&Value::Mapping(map)).unwrap_or_default()
223    }
224
225    /// True if the file is content (under `sources/`, `records/`, or `wiki/`)
226    /// and not an `index.md`. Used by validate to decide which files require a
227    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
228    pub fn is_content_file(path: &Path) -> bool {
229        // index.md is a meta file at every level, never content.
230        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
231            return false;
232        }
233        // Content iff some path component is one of the three layer dirs. This
234        // works for both store-relative (`sources/emails/x.md`) and absolute
235        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
236        // root, under no layer, so they fall through to false.
237        path.components().any(|c| {
238            c.as_os_str()
239                .to_str()
240                .is_some_and(|s| LAYER_DIRS.contains(&s))
241        })
242    }
243
244    /// Resolve the file's effective `id`: the explicit `id` field if present,
245    /// otherwise derived from the store-relative path (filename without `.md`).
246    pub fn effective_id(&self, store_relative_path: &Path) -> String {
247        if let Some(id) = &self.id {
248            if !id.is_empty() {
249                return id.clone();
250            }
251        }
252        // Derived id = filename without the `.md` extension.
253        store_relative_path
254            .file_stem()
255            .and_then(|s| s.to_str())
256            .unwrap_or_default()
257            .to_string()
258    }
259
260    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
261    /// typed fields first and then [`extra`](Frontmatter::extra).
262    pub fn get(&self, key: &str) -> Option<Value> {
263        match key {
264            "type" => self.type_.clone().map(Value::String),
265            "id" => self.id.clone().map(Value::String),
266            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
267            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
268            "summary" => self.summary.clone().map(Value::String),
269            "status" => self.status.clone().map(Value::String),
270            "tags" => {
271                if self.tags.is_empty() {
272                    None
273                } else {
274                    Some(Value::Sequence(
275                        self.tags.iter().cloned().map(Value::String).collect(),
276                    ))
277                }
278            }
279            _ => self.extra.get(key).cloned(),
280        }
281    }
282
283    /// Set a single frontmatter key from a string value, routing universal-
284    /// contract keys to their typed fields and everything else to
285    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
286    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
287        match key {
288            "type" => self.type_ = Some(value.to_string()),
289            "id" => self.id = Some(value.to_string()),
290            "created" => {
291                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
292            }
293            "updated" => {
294                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
295            }
296            "summary" => self.summary = Some(value.to_string()),
297            "status" => self.status = Some(value.to_string()),
298            "tags" => {
299                // Accept either a YAML flow list (`[a, b]`) or a single scalar
300                // tag. Anything that parses to a sequence becomes the tag list;
301                // otherwise the whole string is one tag.
302                self.tags = match serde_yml::from_str::<Value>(value) {
303                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
304                    _ => vec![value.to_string()],
305                };
306            }
307            _ => {
308                // A custom / type-specific field. The value is a scalar string by
309                // default, but the spec's list-valued link fields (e.g.
310                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
311                // block sequence of quoted wiki-links — never the flow-form string
312                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
313                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
314                // sequence whose every item is a clean single wiki-link, store the
315                // canonical sequence so `to_yaml` emits block form. Everything else
316                // — plain text, and a single inline `[[x]]` (which YAML reads as a
317                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
318                // verbatim scalar string, preserving the prior behavior.
319                let stored = parse_link_list_value(value)
320                    .unwrap_or_else(|| Value::String(value.to_string()));
321                self.extra.insert(key.to_string(), stored);
322            }
323        }
324        Ok(())
325    }
326
327    /// Extract every frontmatter field whose value is a wiki-link (scalar
328    /// inline form or a block-sequence list), pairing each with its key. The
329    /// validate engine checks these against `(link)` schema annotations.
330    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
331        let mut out = Vec::new();
332        // `summary` may carry navigational wiki-links (spec encourages it).
333        if let Some(summary) = &self.summary {
334            for link in extract_wiki_links(summary, Path::new("")) {
335                out.push(("summary".to_string(), link));
336            }
337        }
338        // Every type-specific / custom field: a scalar wiki-link or a list of
339        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
340        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
341        for (key, value) in &self.extra {
342            for link in links_in_field_value(value) {
343                out.push((key.clone(), link));
344            }
345        }
346        out
347    }
348}
349
350/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
351///
352/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
353/// flags whether it's a full store-relative path (the doctrine) versus a
354/// short-form (a validation error).
355#[derive(Debug, Clone, PartialEq, Eq)]
356pub struct WikiLink {
357    /// The link target as written, without the `[[ ]]` and without `|display`.
358    pub target: String,
359    /// The optional `|display` text override.
360    pub display: Option<String>,
361    /// True when `target` is a full store-relative path (contains a `/` and
362    /// resolves under a known layer); false for short-form targets like
363    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
364    pub is_full_path: bool,
365    /// True when `target` carries a trailing `.md` extension — validate warns
366    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
367    pub has_md_extension: bool,
368    /// Where the link appears: `(file, line, col)`, 1-based line and column.
369    pub location: (PathBuf, u32, u32),
370}
371
372/// A standard markdown link `[text](url)` — an external reference, kept in a
373/// stream separate from [`WikiLink`] so external targets are visible to the
374/// toolkit without being conflated with in-store edges. Not graph-validated.
375#[derive(Debug, Clone, PartialEq, Eq)]
376pub struct MarkdownLink {
377    /// The link text inside `[ ]`.
378    pub text: String,
379    /// The URL or path inside `( )`.
380    pub url: String,
381    /// Where the link appears: `(file, line, col)`, 1-based.
382    pub location: (PathBuf, u32, u32),
383}
384
385/// A `##`/`###` section of a markdown body: the heading text plus the byte
386/// slice of the body it spans (heading line through the line before the next
387/// heading of equal-or-shallower depth).
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct Section {
390    /// The heading text (without the leading `#`s).
391    pub heading: String,
392    /// Heading depth (number of leading `#`s).
393    pub level: u8,
394    /// The 1-based line where the heading appears.
395    pub line: u32,
396    /// The section body, from the heading line to the next sibling-or-shallower
397    /// heading (exclusive), as a slice of the original body.
398    pub body: String,
399}
400
401/// The parsed structured content of a store's `DB.md` config file.
402///
403/// All four parts are optional in the source; absent parts fall back to spec
404/// defaults. Produced by [`parse_db_md`].
405#[derive(Debug, Clone, Default, PartialEq)]
406pub struct Config {
407    /// Body of the `## Agent instructions` section — free-form prose passed to
408    /// the agent's system prompt.
409    pub agent_instructions: Option<String>,
410    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
411    /// refuses to write (`POLICY_FROZEN_PAGE`).
412    pub frozen_pages: Vec<PathBuf>,
413    /// `## Policies` → `### Ignored types`: type names the curator never
414    /// synthesizes (still readable as ambient context).
415    pub ignored_types: Vec<String>,
416    /// `## Schemas` → one entry per `### <type>` sub-section.
417    pub schemas: BTreeMap<String, Schema>,
418}
419
420impl Config {
421    /// The `### Frozen pages` entry that matches a store-relative `target`, if
422    /// any. The **single** frozen-page matcher every write surface must funnel
423    /// through so the policy is enforced identically on `write` / `fm set` /
424    /// `fm init` / `link` / `rename` / `format`.
425    ///
426    /// Comparison is normalized so a policy line and a write target match
427    /// regardless of incidental spelling differences:
428    /// - `/` path separators on every OS,
429    /// - a single leading `./` dropped,
430    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
431    ///   frozen entries verbatim, so an operator who writes the natural
432    ///   extensionless spelling (`records/decisions/q1`) must protect the file
433    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
434    ///
435    /// Returns the matched config entry verbatim (its original spelling) so the
436    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
437    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
438        let want = normalize_frozen_path(target);
439        self.frozen_pages
440            .iter()
441            .find(|frozen| normalize_frozen_path(frozen) == want)
442            .cloned()
443    }
444
445    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
446    /// over [`Config::frozen_match`] for callers that only need presence.
447    pub fn is_frozen(&self, target: &Path) -> bool {
448        self.frozen_match(target).is_some()
449    }
450}
451
452/// Normalize a path for frozen-page comparison: `/` separators, a single
453/// leading `./` dropped, and a trailing `.md` dropped. Both the policy entry
454/// and the write target pass through this before equality, so the match is
455/// separator-, `./`-, and `.md`-insensitive.
456fn normalize_frozen_path(p: &Path) -> String {
457    let unix: String = p
458        .components()
459        .filter_map(|c| c.as_os_str().to_str())
460        .collect::<Vec<_>>()
461        .join("/");
462    let no_dot = unix.strip_prefix("./").unwrap_or(&unix);
463    no_dot.strip_suffix(".md").unwrap_or(no_dot).to_string()
464}
465
466/// A custom (or canonical-override) type schema parsed from a `DB.md`
467/// `### <type>` sub-section.
468#[derive(Debug, Clone, Default, PartialEq)]
469pub struct Schema {
470    /// One [`FieldSpec`] per bulleted field line, in source order.
471    pub fields: Vec<FieldSpec>,
472}
473
474/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
475///
476/// Modifiers are comma-separated inside the parens; this captures the
477/// recognized ones as typed fields and stashes anything unrecognized in
478/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
479#[derive(Debug, Clone, Default, PartialEq)]
480pub struct FieldSpec {
481    /// The field name.
482    pub name: String,
483    /// `required` modifier present.
484    pub required: bool,
485    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
486    /// `url`), if any.
487    pub shape: Option<Shape>,
488    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
489    /// start with. The trailing slash is required in the source syntax.
490    pub link_prefix: Option<PathBuf>,
491    /// `default <value>` — the value written when the field is absent.
492    pub default: Option<Value>,
493    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
494    /// on the line because of its own commas).
495    pub enum_values: Option<Vec<String>>,
496    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
497    /// validate surfaces these as `Info`, never errors.
498    pub unknown_modifiers: Vec<String>,
499}
500
501/// A recognized shape modifier for a schema field. Validate enforces the
502/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
503#[derive(Debug, Clone, Copy, PartialEq, Eq)]
504pub enum Shape {
505    /// Any scalar string.
506    String,
507    /// Integer.
508    Int,
509    /// Boolean.
510    Bool,
511    /// RFC3339 / ISO-8601 date.
512    Date,
513    /// `<local>@<domain>` email address.
514    Email,
515    /// A currency amount.
516    Currency,
517    /// A URL.
518    Url,
519}
520
521/// The result of splitting a raw file into its frontmatter block and body.
522///
523/// `body` is the verbatim remainder after the closing `---` fence — the writer
524/// preserves it byte-for-byte so operator edits are never reflowed.
525#[derive(Debug, Clone, PartialEq, Eq)]
526pub struct ParsedFile {
527    /// The raw frontmatter YAML (between the fences, exclusive of them).
528    pub frontmatter_yaml: String,
529    /// The verbatim body (everything after the closing `---`).
530    pub body: String,
531}
532
533/// Split a file's full text into its frontmatter block and body. The
534/// frontmatter block must be the very first thing in the file, delimited by
535/// `---` on its own line at start and end. Returns
536/// [`ParseError::MissingFrontmatter`] if absent.
537pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
538    // The opening fence must be the very first line: `---` (optionally with a
539    // trailing CR), no leading whitespace, nothing before it.
540    let mut lines = text.split_inclusive('\n');
541    let first = lines.next().unwrap_or("");
542    if first.trim_end_matches(['\r', '\n']) != "---" {
543        return Err(ParseError::MissingFrontmatter {
544            file: file.to_path_buf(),
545        });
546    }
547
548    // Scan for the closing fence line. Track byte offsets so we can slice the
549    // YAML (between fences, exclusive) and the body (verbatim, after the
550    // closing fence's line terminator).
551    let opening_len = first.len();
552    let mut offset = opening_len;
553    for line in lines {
554        if line.trim_end_matches(['\r', '\n']) == "---" {
555            let yaml = &text[opening_len..offset];
556            let body_start = offset + line.len();
557            let body = &text[body_start..];
558            return Ok(ParsedFile {
559                frontmatter_yaml: yaml.to_string(),
560                body: body.to_string(),
561            });
562        }
563        offset += line.len();
564    }
565
566    // Opening fence present but no closing fence: malformed frontmatter block.
567    Err(ParseError::MissingFrontmatter {
568        file: file.to_path_buf(),
569    })
570}
571
572/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
573/// verbatim body string.
574pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
575    let text = std::fs::read_to_string(path)?;
576    let parsed = split_frontmatter(&text, path)?;
577    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
578    Ok((fm, parsed.body))
579}
580
581/// Atomically write a markdown file from frontmatter + body: emit the
582/// frontmatter in canonical key order, then the body verbatim, via a
583/// temp-file-rename so a reader never sees a half-written file. Preserves the
584/// operator-edited body exactly as given.
585pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
586    use std::io::Write;
587
588    let yaml = frontmatter.to_yaml();
589    // `to_yaml` already terminates each block with a newline. Compose the file
590    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
591    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
592    contents.push_str("---\n");
593    contents.push_str(&yaml);
594    contents.push_str("---\n");
595    contents.push_str(body);
596
597    // Atomic write: write to a sibling temp file in the same directory, then
598    // rename over the target. Same-dir rename is atomic on a single
599    // filesystem, so a concurrent reader never sees a half-written file.
600    let parent = path.parent().unwrap_or_else(|| Path::new("."));
601    std::fs::create_dir_all(parent)?;
602    let file_name = path
603        .file_name()
604        .and_then(|n| n.to_str())
605        .unwrap_or("dbmd-write");
606    let tmp = parent.join(format!(".{file_name}.tmp.{}", std::process::id()));
607
608    // Scope the handle so it is flushed and closed before the rename.
609    {
610        let mut f = std::fs::File::create(&tmp)?;
611        f.write_all(contents.as_bytes())?;
612        f.sync_all()?;
613    }
614    // On failure, clean up the temp file rather than leaking it.
615    if let Err(e) = std::fs::rename(&tmp, path) {
616        let _ = std::fs::remove_file(&tmp);
617        return Err(ParseError::Io(e));
618    }
619    Ok(())
620}
621
622/// Extract every wiki-link from a body (and inline frontmatter), returning the
623/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
624/// `(file, line, col)` locations set.
625pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
626    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
627    let re = RE.get_or_init(|| {
628        // [[target]] or [[target|display]]; target/display exclude brackets and
629        // (for target) the `|` separator so nested forms don't over-match.
630        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
631    });
632
633    let mut out = Vec::new();
634    for (line_idx, line) in body.lines().enumerate() {
635        for caps in re.captures_iter(line) {
636            let whole = caps.get(0).expect("group 0 always present");
637            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
638            let display = caps.get(2).map(|m| m.as_str().to_string());
639            out.push(WikiLink {
640                is_full_path: target_is_full_path(&target),
641                has_md_extension: target_has_md_extension(&target),
642                target,
643                display,
644                location: (
645                    file.to_path_buf(),
646                    (line_idx as u32) + 1,
647                    char_column(line, whole.start()),
648                ),
649            });
650        }
651    }
652    out
653}
654
655/// Extract every standard markdown link `[text](url)` from a body into a
656/// separate stream, kept distinct from wiki-links.
657pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
658    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
659    let re = RE.get_or_init(|| {
660        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
661        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
662        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
663    });
664
665    let mut out = Vec::new();
666    for (line_idx, line) in body.lines().enumerate() {
667        for caps in re.captures_iter(line) {
668            let whole = caps.get(0).expect("group 0 always present");
669            out.push(MarkdownLink {
670                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
671                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
672                location: (
673                    file.to_path_buf(),
674                    (line_idx as u32) + 1,
675                    char_column(line, whole.start()),
676                ),
677            });
678        }
679    }
680    out
681}
682
683/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
684/// written so YAML parses it as nested sequences instead of a clean list of
685/// strings. Returns the offending keys so validate can emit
686/// `WIKI_LINK_FLOW_FORM_LIST`.
687///
688/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
689/// shapes nest:
690///
691/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
692///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
693/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
694///   (triple-nested). The list mis-encoding — flagged.
695/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
696///   is flagged too; the canonical list form must quote each item
697///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
698///
699/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
700/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
701/// single item is a sequence-of-scalars.
702pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
703    let value: Value = match serde_yml::from_str(frontmatter_yaml) {
704        Ok(v) => v,
705        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
706        Err(_) => return Vec::new(),
707    };
708    let Value::Mapping(map) = value else {
709        return Vec::new();
710    };
711
712    let mut out = Vec::new();
713    for (k, v) in &map {
714        if let Value::Sequence(items) = v {
715            // Triple-nesting: some outer item is a sequence that itself holds a
716            // sequence. Scalar inline `[[x]]` is only double-nested, so it
717            // never matches.
718            let is_link_list = items.iter().any(|item| match item {
719                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
720                _ => false,
721            });
722            if is_link_list {
723                if let Some(key) = k.as_str() {
724                    out.push(key.to_string());
725                }
726            }
727        }
728    }
729    out
730}
731
732/// Extract the `##`/`###` sections of a markdown body into a flat list with
733/// body slices.
734pub fn extract_sections(body: &str) -> Vec<Section> {
735    // Keep each line's start so we can slice the body verbatim (exact newlines).
736    let lines: Vec<&str> = body.split_inclusive('\n').collect();
737
738    // First pass: classify heading levels (0 = not a heading), honoring fenced
739    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
740    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
741    let mut fence: Option<(u8, usize)> = None;
742    for line in &lines {
743        let content = line.trim_end_matches(['\n', '\r']);
744        if let Some(f) = fence {
745            if is_closing_fence(content, f) {
746                fence = None;
747            }
748            levels.push(0);
749            continue;
750        }
751        if let Some(opened) = opening_fence(content) {
752            fence = Some(opened);
753            levels.push(0);
754            continue;
755        }
756        levels.push(heading_level(content));
757    }
758
759    // Second pass: emit `##`+ headings; each section body runs from its heading
760    // line to the next heading at an equal-or-shallower level (exclusive).
761    let mut sections = Vec::new();
762    for (i, &lvl) in levels.iter().enumerate() {
763        if lvl < 2 {
764            continue;
765        }
766        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
767        let heading = heading_text(heading_line, lvl);
768
769        let mut end = lines.len();
770        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
771            if other != 0 && other <= lvl {
772                end = j;
773                break;
774            }
775        }
776
777        sections.push(Section {
778            heading,
779            level: lvl,
780            line: (i + 1) as u32,
781            body: lines[i..end].concat(),
782        });
783    }
784    sections
785}
786
787/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
788/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
789/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
790/// ignored; absent sections leave their [`Config`] fields at default.
791pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
792    // The structured sections live in the body (after frontmatter). DB.md must
793    // still start with a valid `---` block (`type: db-md`); if it's missing we
794    // surface MissingFrontmatter like any other file.
795    let parsed = split_frontmatter(text, file)?;
796    let sections = extract_sections(&parsed.body);
797
798    let mut config = Config::default();
799    // Track which H2 region each H3 belongs to as we walk the flat list.
800    let mut current_h2: Option<String> = None;
801
802    for section in &sections {
803        match section.level {
804            2 => {
805                let name = section.heading.trim().to_ascii_lowercase();
806                current_h2 = Some(name.clone());
807                if name == "agent instructions" {
808                    let prose = section_prose(&section.body);
809                    if !prose.is_empty() {
810                        config.agent_instructions = Some(prose);
811                    }
812                }
813            }
814            3 => {
815                let h2 = current_h2.as_deref().unwrap_or("");
816                let h3 = section.heading.trim().to_ascii_lowercase();
817                match (h2, h3.as_str()) {
818                    ("policies", "frozen pages") => {
819                        config.frozen_pages = bullet_lines(&section.body)
820                            .into_iter()
821                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
822                            .collect();
823                    }
824                    ("policies", "ignored types") => {
825                        config.ignored_types = bullet_lines(&section.body)
826                            .into_iter()
827                            .flat_map(|b| extract_type_list_bullet(&b))
828                            .collect();
829                    }
830                    ("schemas", _) => {
831                        // The H3 heading text (as written) is the type name.
832                        let type_name = section.heading.trim().to_string();
833                        let fields: Vec<FieldSpec> = bullet_lines(&section.body)
834                            .into_iter()
835                            .map(|b| parse_field_spec(&b))
836                            .collect();
837                        config.schemas.insert(type_name, Schema { fields });
838                    }
839                    _ => {}
840                }
841            }
842            _ => {}
843        }
844    }
845
846    Ok(config)
847}
848
849/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
850/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
851/// in [`FieldSpec::unknown_modifiers`].
852pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
853    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
854    let line = bullet_line.trim();
855    let line = line
856        .strip_prefix("- ")
857        .or_else(|| line.strip_prefix("* "))
858        .or_else(|| line.strip_prefix("+ "))
859        .or_else(|| line.strip_prefix('-'))
860        .unwrap_or(line)
861        .trim();
862
863    // Split `<name> (<modifiers>)`. A bullet without parens is a free-form
864    // optional field of any shape — name only, no modifiers.
865    let (name, modifiers) = match line.find('(') {
866        Some(open) => {
867            let name = line[..open].trim().to_string();
868            let after = &line[open + 1..];
869            let mods = match after.rfind(')') {
870                Some(close) => &after[..close],
871                None => after, // tolerate a missing close paren
872            };
873            (name, mods.trim())
874        }
875        None => (line.to_string(), ""),
876    };
877
878    let mut spec = FieldSpec {
879        name,
880        ..FieldSpec::default()
881    };
882
883    if modifiers.is_empty() {
884        return spec;
885    }
886
887    // Modifiers are comma-separated. `enum:` is special: because its own value
888    // list contains commas, it must be last and swallows the remainder.
889    let raw: Vec<&str> = modifiers.split(',').collect();
890    let mut i = 0;
891    while i < raw.len() {
892        let token = raw[i].trim();
893        if token.is_empty() {
894            i += 1;
895            continue;
896        }
897        let lower = token.to_ascii_lowercase();
898
899        if lower == "required" {
900            spec.required = true;
901        } else if let Some(shape) = shape_from_str(&lower) {
902            spec.shape = Some(shape);
903        } else if let Some(rest) = lower.strip_prefix("link to ") {
904            // The trailing slash is required in the source; store the prefix
905            // without it so `Path::starts_with` comparisons are clean.
906            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
907            let _ = rest; // lowercase form only used for the keyword match
908            spec.link_prefix = Some(PathBuf::from(prefix));
909        } else if let Some(_rest) = lower.strip_prefix("default ") {
910            // Value is everything after the keyword on this comma-token,
911            // preserving original case.
912            let value = token["default ".len()..].trim().to_string();
913            spec.default = Some(Value::String(value));
914        } else if lower.starts_with("enum:") || lower == "enum" {
915            // Rejoin this token and every remaining token to recover the full
916            // comma-separated value list.
917            let mut joined = raw[i..].join(",");
918            // Drop the leading `enum:` keyword (case-insensitive).
919            if let Some(colon) = joined.find(':') {
920                joined = joined[colon + 1..].to_string();
921            }
922            let values: Vec<String> = joined
923                .split(',')
924                .map(|v| v.trim().to_string())
925                .filter(|v| !v.is_empty())
926                .collect();
927            spec.enum_values = Some(values);
928            break; // enum consumed the rest of the line
929        } else {
930            // Unrecognized modifier — captured verbatim, surfaced as Info.
931            spec.unknown_modifiers.push(token.to_string());
932        }
933        i += 1;
934    }
935
936    spec
937}
938
939// ── Private helpers ─────────────────────────────────────────────────────────
940
941/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
942/// is treated as absent; anything else must be an RFC3339 string.
943fn parse_timestamp(
944    value: &Value,
945    key: &str,
946    file: &Path,
947) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
948    match value {
949        Value::Null => Ok(None),
950        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
951        other => Err(ParseError::BadTimestamp {
952            file: file.to_path_buf(),
953            key: key.to_string(),
954            value: format!("{other:?}"),
955        }),
956    }
957}
958
959/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
960fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
961    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
962        file: file.to_path_buf(),
963        key: key.to_string(),
964        value: s.to_string(),
965    })
966}
967
968/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
969/// (the canonical form) or a single scalar (coerced to a one-element list).
970fn parse_tags(value: &Value) -> Vec<String> {
971    match value {
972        Value::Sequence(items) => items
973            .iter()
974            .filter_map(|v| match v {
975                Value::String(s) => Some(s.clone()),
976                Value::Number(n) => Some(n.to_string()),
977                Value::Bool(b) => Some(b.to_string()),
978                _ => None,
979            })
980            .collect(),
981        Value::String(s) => vec![s.clone()],
982        _ => Vec::new(),
983    }
984}
985
986/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
987/// location, or `None` if the string is not a bare wiki-link. Used for
988/// frontmatter-valued links where there is no body position to report.
989fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
990    let s = s.trim();
991    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
992    // Reject anything with further brackets (e.g. the nested flow-form item),
993    // which is not a clean single wiki-link.
994    if inner.contains('[') || inner.contains(']') {
995        return None;
996    }
997    let (target, display) = match inner.split_once('|') {
998        Some((t, d)) => (t.to_string(), Some(d.to_string())),
999        None => (inner.to_string(), None),
1000    };
1001    Some(WikiLink {
1002        is_full_path: target_is_full_path(&target),
1003        has_md_extension: target_has_md_extension(&target),
1004        target,
1005        display,
1006        location: (PathBuf::new(), 0, 0),
1007    })
1008}
1009
1010/// Extract every wiki-link from a single frontmatter field value, accepting the
1011/// two canonical forms the spec defines (SPEC § Linking):
1012///
1013/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1014///   canonical unquoted inline (`f: [[x]]`) form, and
1015/// - a **list** field whose items are quoted wiki-link strings
1016///   (`- "[[x]]"`).
1017///
1018/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1019/// so the parsed [`Value`] shapes are not what one would naively expect:
1020///
1021/// | source                         | parsed `Value`                     | here |
1022/// |--------------------------------|------------------------------------|------|
1023/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1024/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1025/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1026/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1027///
1028/// The last row — an *unquoted list* — parses identically to the flow-form list
1029/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1030/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1031/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1032/// edge enumerator only ever sees the valid canonical forms.
1033///
1034/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1035/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1036/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1037fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1038    // Quoted scalar: `field: "[[x]]"`.
1039    if let Value::String(s) = value {
1040        return parse_wiki_link_str(s).into_iter().collect();
1041    }
1042    let Value::Sequence(items) = value else {
1043        return Vec::new();
1044    };
1045    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1046    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1047    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1048    if items.len() == 1 {
1049        if let Some(link) = unquoted_inline_link(&items[0]) {
1050            return vec![link];
1051        }
1052    }
1053    // Otherwise a list of quoted wiki-link strings; non-string items (the
1054    // unquoted-list mis-encoding) are left for validate to flag.
1055    items
1056        .iter()
1057        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1058        .collect()
1059}
1060
1061/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1062///
1063/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1064/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1065/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1066/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1067/// Re-emitting that verbatim yields the block sequence
1068///
1069/// ```text
1070/// company:
1071/// - - records/companies/northstar
1072/// ```
1073///
1074/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1075/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1076/// a value back into the canonical emitted form before it is written:
1077///
1078/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1079///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_yml emits
1080///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1081///   that [`links_in_field_value`] reads back as the same scalar link;
1082/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1083///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1084///   `set` write-in path and the canonical list form;
1085/// - everything else → returned verbatim (the common no-op for non-link values).
1086///
1087/// `|display` is preserved in both link branches. This is the single point that
1088/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1089/// corrupting a pre-existing canonical link, since they all funnel through
1090/// `to_yaml`.
1091fn canonicalize_extra_value(value: &Value) -> Value {
1092    match value {
1093        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1094        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1095        // YAML nested sequence). Non-link strings are returned untouched.
1096        Value::String(s) => match parse_wiki_link_str(s) {
1097            Some(link) => Value::String(wiki_link_literal(&link)),
1098            None => value.clone(),
1099        },
1100        Value::Sequence(items) => {
1101            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1102            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1103            // scalar string so the link is preserved rather than block-emitted.
1104            if items.len() == 1 {
1105                if let Some(link) = unquoted_inline_link(&items[0]) {
1106                    return Value::String(wiki_link_literal(&link));
1107                }
1108            }
1109            // List of wiki-links: re-emit as a block sequence of quoted-link
1110            // strings, the canonical list form `to_yaml` renders block-style and
1111            // `links_in_field_value` accepts. Only canonicalize when *every* item
1112            // is a clean single wiki-link; a list with any non-link item is left
1113            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1114            // validate flags) are untouched.
1115            let mut links = Vec::with_capacity(items.len());
1116            for item in items {
1117                match link_from_flow_list_item(item) {
1118                    Some(link) => links.push(link),
1119                    None => return value.clone(),
1120                }
1121            }
1122            if links.is_empty() {
1123                return value.clone();
1124            }
1125            Value::Sequence(
1126                links
1127                    .iter()
1128                    .map(|l| Value::String(wiki_link_literal(l)))
1129                    .collect(),
1130            )
1131        }
1132        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1133        _ => value.clone(),
1134    }
1135}
1136
1137/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1138/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1139fn wiki_link_literal(link: &WikiLink) -> String {
1140    match &link.display {
1141        Some(d) => format!("[[{}|{}]]", link.target, d),
1142        None => format!("[[{}]]", link.target),
1143    }
1144}
1145
1146/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1147/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1148/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1149/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1150/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1151/// from being mistaken for a wiki-link.
1152fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1153    let Value::Sequence(items) = v else {
1154        return None;
1155    };
1156    if items.len() != 1 {
1157        return None;
1158    }
1159    let s = items[0].as_str()?;
1160    // A clean unquoted wiki-link has no further brackets inside it.
1161    if s.contains('[') || s.contains(']') {
1162        return None;
1163    }
1164    parse_wiki_link_str(&format!("[[{s}]]"))
1165}
1166
1167/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1168/// wiki-links** that should be stored as a YAML block sequence, returning the
1169/// canonical `Value::Sequence` of quoted-link strings when so.
1170///
1171/// The value path of every write surface stringifies its argument; without this
1172/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1173/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1174/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1175/// two list spellings an agent naturally types and normalizes both to the block
1176/// form the canonical writer emits and `dbmd validate` accepts:
1177///
1178/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1179/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1180///
1181/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1182/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1183/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1184/// renders inline), an empty list, or a list with any non-link item. A single
1185/// link must stay scalar; only genuine multi-item-or-explicit lists become
1186/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1187/// validator never disagree.
1188fn parse_link_list_value(value: &str) -> Option<Value> {
1189    let trimmed = value.trim();
1190    // Only a YAML *flow sequence* literal is a list candidate; anything not
1191    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1192    // the single-inline-link guard below).
1193    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1194        return None;
1195    }
1196    let Ok(Value::Sequence(items)) = serde_yml::from_str::<Value>(trimmed) else {
1197        return None;
1198    };
1199    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1200    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1201    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1202    // list. `links_in_field_value` reads it back as a scalar link either way.
1203    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1204        return None;
1205    }
1206    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1207    // spellings an agent types (see [`link_from_flow_list_item`]).
1208    let mut links = Vec::with_capacity(items.len());
1209    for item in &items {
1210        links.push(link_from_flow_list_item(item)?);
1211    }
1212    if links.is_empty() {
1213        return None;
1214    }
1215    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1216    // renders block-style and `links_in_field_value` accepts. `|display` is
1217    // preserved.
1218    let normalized = links
1219        .iter()
1220        .map(|l| Value::String(wiki_link_literal(l)))
1221        .collect();
1222    Some(Value::Sequence(normalized))
1223}
1224
1225/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1226/// across the spellings an agent types for a list. After top-level flow parsing,
1227/// a list item arrives in one of:
1228///
1229/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1230/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1231///   (one level deeper than a bare unquoted scalar, because the surrounding list
1232///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1233///   `Seq[String(x)]` with [`unquoted_inline_link`].
1234///
1235/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1236/// caller falls back to a scalar string and never fabricates a partial list.
1237fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1238    match item {
1239        Value::String(s) => parse_wiki_link_str(s),
1240        Value::Sequence(inner) => {
1241            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1242            // wrapper to expose the inline-link shape.
1243            if inner.len() == 1 {
1244                if let Some(link) = unquoted_inline_link(&inner[0]) {
1245                    return Some(link);
1246                }
1247            }
1248            // Defensive: also accept the inline-link shape directly.
1249            unquoted_inline_link(item)
1250        }
1251        _ => None,
1252    }
1253}
1254
1255/// A target is a full store-relative path when its first path segment is one of
1256/// the three canonical layer dirs and at least one `/` separator follows. A
1257/// trailing `.md` does not affect this classification.
1258fn target_is_full_path(target: &str) -> bool {
1259    let target = target.trim();
1260    match target.split_once('/') {
1261        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1262        None => false,
1263    }
1264}
1265
1266/// True when the target carries a trailing `.md` extension (validate warns
1267/// `WIKI_LINK_HAS_EXTENSION`).
1268fn target_has_md_extension(target: &str) -> bool {
1269    target.trim().ends_with(".md")
1270}
1271
1272/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1273fn char_column(line: &str, byte_offset: usize) -> u32 {
1274    (line[..byte_offset].chars().count() as u32) + 1
1275}
1276
1277/// Map a lowercase shape keyword to its [`Shape`].
1278fn shape_from_str(s: &str) -> Option<Shape> {
1279    match s {
1280        "string" => Some(Shape::String),
1281        "int" => Some(Shape::Int),
1282        "bool" => Some(Shape::Bool),
1283        "date" => Some(Shape::Date),
1284        "email" => Some(Shape::Email),
1285        "currency" => Some(Shape::Currency),
1286        "url" => Some(Shape::Url),
1287        _ => None,
1288    }
1289}
1290
1291/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1292/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1293/// end-of-line) after the `#` run, caps the run at six.
1294fn heading_level(line: &str) -> u8 {
1295    let indent = line.len() - line.trim_start_matches(' ').len();
1296    if indent > 3 {
1297        return 0;
1298    }
1299    let rest = &line[indent..];
1300    let hashes = rest.len() - rest.trim_start_matches('#').len();
1301    if hashes == 0 || hashes > 6 {
1302        return 0;
1303    }
1304    let after = &rest[hashes..];
1305    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1306        hashes as u8
1307    } else {
1308        0
1309    }
1310}
1311
1312/// The heading text after the `#` run, trimmed, with any trailing ATX closing
1313/// `#` sequence removed (`## Title ##` → `Title`).
1314fn heading_text(line: &str, level: u8) -> String {
1315    let indent = line.len() - line.trim_start_matches(' ').len();
1316    let after_hashes = &line[indent + level as usize..];
1317    let trimmed = after_hashes.trim();
1318    let no_trailing = trimmed.trim_end_matches('#');
1319    if no_trailing.len() == trimmed.len() {
1320        trimmed.to_string()
1321    } else {
1322        no_trailing.trim_end().to_string()
1323    }
1324}
1325
1326/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1327fn opening_fence(line: &str) -> Option<(u8, usize)> {
1328    let indent = line.len() - line.trim_start_matches(' ').len();
1329    if indent > 3 {
1330        return None;
1331    }
1332    let rest = &line[indent..];
1333    let byte = rest.bytes().next()?;
1334    if byte != b'`' && byte != b'~' {
1335        return None;
1336    }
1337    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1338    if run < 3 {
1339        return None;
1340    }
1341    // A backtick fence's info string may not itself contain a backtick.
1342    if byte == b'`' && rest[run..].contains('`') {
1343        return None;
1344    }
1345    Some((byte, run))
1346}
1347
1348/// True if `line` closes the currently open fence: same char, run at least as
1349/// long, nothing but trailing whitespace after.
1350fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1351    let (byte, open_len) = fence;
1352    let indent = line.len() - line.trim_start_matches(' ').len();
1353    if indent > 3 {
1354        return false;
1355    }
1356    let rest = &line[indent..];
1357    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1358    if run < open_len {
1359        return false;
1360    }
1361    rest[run..].trim().is_empty()
1362}
1363
1364/// The prose body of a section: everything after the heading line, trimmed.
1365fn section_prose(section_body: &str) -> String {
1366    match section_body.split_once('\n') {
1367        Some((_heading, rest)) => rest.trim().to_string(),
1368        None => String::new(),
1369    }
1370}
1371
1372/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1373/// line, each returned with its leading whitespace trimmed.
1374fn bullet_lines(section_body: &str) -> Vec<String> {
1375    section_body
1376        .lines()
1377        .skip(1) // the heading line
1378        .map(str::trim)
1379        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1380        .map(|l| l.to_string())
1381        .collect()
1382}
1383
1384/// Cut a bullet's content at the first ` — ` / ` -- ` comment separator,
1385/// returning only the meaningful prefix.
1386fn strip_bullet_comment(content: &str) -> &str {
1387    let mut cut = content.len();
1388    for sep in [" — ", " -- ", " – "] {
1389        if let Some(idx) = content.find(sep) {
1390            cut = cut.min(idx);
1391        }
1392    }
1393    content[..cut].trim()
1394}
1395
1396/// Strip the leading bullet marker, returning the trimmed content after it.
1397fn bullet_content(bullet: &str) -> &str {
1398    let t = bullet.trim();
1399    t.strip_prefix("- ")
1400        .or_else(|| t.strip_prefix("* "))
1401        .or_else(|| t.strip_prefix("+ "))
1402        .unwrap_or(t)
1403        .trim()
1404}
1405
1406/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1407/// wrapped in backticks and followed by an em-dash comment.
1408fn extract_path_bullet(bullet: &str) -> String {
1409    let content = bullet_content(bullet);
1410    // Prefer a backtick-delimited span if present.
1411    if let Some(start) = content.find('`') {
1412        if let Some(end_rel) = content[start + 1..].find('`') {
1413            return content[start + 1..start + 1 + end_rel].trim().to_string();
1414        }
1415    }
1416    // Otherwise take the text up to a comment separator, stripping quotes.
1417    strip_bullet_comment(content)
1418        .trim_matches('"')
1419        .trim_matches('\'')
1420        .trim()
1421        .to_string()
1422}
1423
1424/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1425/// backticks/quotes and any trailing em-dash comment.
1426fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1427    let content = strip_bullet_comment(bullet_content(bullet));
1428    content
1429        .split(',')
1430        .map(|t| {
1431            t.trim()
1432                .trim_matches('`')
1433                .trim_matches('"')
1434                .trim_matches('\'')
1435                .trim()
1436                .to_string()
1437        })
1438        .filter(|t| !t.is_empty())
1439        .collect()
1440}
1441
1442#[cfg(test)]
1443mod tests {
1444    use super::*;
1445    use std::path::Path;
1446    use tempfile::tempdir;
1447
1448    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1449
1450    #[test]
1451    fn frozen_match_is_md_insensitive_both_directions() {
1452        // A policy entry stored WITHOUT `.md` (the natural extensionless
1453        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1454        // write target — the regression every write surface had.
1455        let cfg = Config {
1456            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1457            ..Config::default()
1458        };
1459        assert_eq!(
1460            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1461            Some(PathBuf::from("records/decisions/q1")),
1462            "extensionless policy entry must freeze the .md file"
1463        );
1464        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1465
1466        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1467        let cfg = Config {
1468            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1469            ..Config::default()
1470        };
1471        assert_eq!(
1472            cfg.frozen_match(Path::new("records/decisions/q1")),
1473            Some(PathBuf::from("records/decisions/q1.md")),
1474        );
1475        // And the same-spelling cases still match.
1476        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1477    }
1478
1479    #[test]
1480    fn frozen_match_drops_leading_dot_slash() {
1481        let cfg = Config {
1482            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1483            ..Config::default()
1484        };
1485        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
1486        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
1487    }
1488
1489    #[test]
1490    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
1491        let cfg = Config {
1492            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1493            ..Config::default()
1494        };
1495        assert!(cfg
1496            .frozen_match(Path::new("records/decisions/q2.md"))
1497            .is_none());
1498        // A prefix is not a match: `q1` must not freeze `q1-draft`.
1499        assert!(cfg
1500            .frozen_match(Path::new("records/decisions/q1-draft.md"))
1501            .is_none());
1502        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
1503    }
1504
1505    // ── split_frontmatter ───────────────────────────────────────────────────
1506
1507    #[test]
1508    fn split_frontmatter_separates_yaml_and_verbatim_body() {
1509        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
1510        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1511        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
1512        // Body is everything after the closing fence's newline, byte-for-byte.
1513        assert_eq!(p.body, "# Heading\n\nBody line.\n");
1514    }
1515
1516    #[test]
1517    fn split_frontmatter_preserves_body_without_trailing_newline() {
1518        let text = "---\ntype: x\n---\nno trailing newline";
1519        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1520        assert_eq!(p.body, "no trailing newline");
1521    }
1522
1523    #[test]
1524    fn split_frontmatter_empty_body_when_nothing_after_fence() {
1525        let text = "---\ntype: x\n---\n";
1526        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1527        assert_eq!(p.body, "");
1528    }
1529
1530    #[test]
1531    fn split_frontmatter_missing_opening_fence_errors() {
1532        let text = "# No frontmatter here\ntype: x\n";
1533        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1534        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1535    }
1536
1537    #[test]
1538    fn split_frontmatter_leading_content_before_fence_rejected() {
1539        // The opening fence must be the very first line; a blank line first is
1540        // not allowed.
1541        let text = "\n---\ntype: x\n---\nbody";
1542        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1543        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1544    }
1545
1546    #[test]
1547    fn split_frontmatter_unterminated_block_errors() {
1548        let text = "---\ntype: x\nsummary: y\n";
1549        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1550        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1551    }
1552
1553    // ── Frontmatter::parse ───────────────────────────────────────────────────
1554
1555    #[test]
1556    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
1557        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
1558        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1559        assert_eq!(fm.type_.as_deref(), Some("contact"));
1560        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
1561        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
1562        assert_eq!(fm.status.as_deref(), Some("active"));
1563        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
1564        // Type-specific fields are NOT promoted to typed slots.
1565        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
1566        assert!(!fm.extra.contains_key("tags"));
1567        assert_eq!(
1568            fm.extra.get("email").and_then(|v| v.as_str()),
1569            Some("sarah@northstar.io")
1570        );
1571        assert_eq!(
1572            fm.extra.get("role").and_then(|v| v.as_str()),
1573            Some("Director")
1574        );
1575    }
1576
1577    #[test]
1578    fn parse_reads_rfc3339_timestamps() {
1579        let yaml =
1580            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
1581        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1582        let created = fm.created.expect("created parsed");
1583        // -07:00 offset is 7 * 3600 seconds west.
1584        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
1585        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
1586        assert!(fm.updated.is_some());
1587    }
1588
1589    #[test]
1590    fn parse_rejects_non_rfc3339_timestamp() {
1591        // A date-only value is not a full RFC3339 timestamp; created/updated
1592        // require the full form.
1593        let yaml = "type: email\ncreated: 2026-05-27";
1594        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1595        match err {
1596            ParseError::BadTimestamp { key, value, .. } => {
1597                assert_eq!(key, "created");
1598                assert_eq!(value, "2026-05-27");
1599            }
1600            other => panic!("expected BadTimestamp, got {other:?}"),
1601        }
1602    }
1603
1604    #[test]
1605    fn parse_malformed_yaml_errors() {
1606        // Unclosed flow mapping is invalid YAML.
1607        let yaml = "type: contact\n  bad: : :\n- nope";
1608        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1609        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1610    }
1611
1612    #[test]
1613    fn parse_empty_block_is_empty_frontmatter() {
1614        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
1615        assert_eq!(fm, Frontmatter::default());
1616    }
1617
1618    #[test]
1619    fn parse_scalar_top_level_is_malformed() {
1620        // A bare scalar at the top level is not a frontmatter mapping.
1621        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
1622        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1623    }
1624
1625    // ── to_yaml canonical order ──────────────────────────────────────────────
1626
1627    #[test]
1628    fn to_yaml_emits_canonical_key_order() {
1629        let mut fm = Frontmatter {
1630            type_: Some("contact".into()),
1631            id: Some("sarah-chen".into()),
1632            summary: Some("Director of Ops".into()),
1633            status: Some("active".into()),
1634            tags: vec!["vip".into()],
1635            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1636            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
1637            ..Default::default()
1638        };
1639        // Two type-specific fields, inserted in NON-alphabetical order to prove
1640        // the writer sorts them (BTreeMap) between the universal head and tail.
1641        fm.extra
1642            .insert("role".into(), Value::String("Director".into()));
1643        fm.extra.insert(
1644            "company".into(),
1645            Value::String("[[records/companies/northstar]]".into()),
1646        );
1647
1648        let yaml = fm.to_yaml();
1649        let keys: Vec<&str> = yaml
1650            .lines()
1651            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
1652            .map(|l| l.split(':').next().unwrap())
1653            .collect();
1654        assert_eq!(
1655            keys,
1656            vec![
1657                "type", "id", "created", "updated", "summary", // universal head
1658                "company", "role",   // type-specific, sorted
1659                "status", // universal tail
1660                "tags",
1661            ],
1662            "canonical order violated; got:\n{yaml}"
1663        );
1664        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
1665        assert!(
1666            yaml.contains("2026-05-27T08:00:00-07:00"),
1667            "created timestamp missing; got:\n{yaml}"
1668        );
1669        // The value re-parses to the same instant regardless of quoting.
1670        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
1671        assert_eq!(reparsed.created, fm.created);
1672        assert_eq!(reparsed.updated, fm.updated);
1673    }
1674
1675    #[test]
1676    fn to_yaml_omits_absent_optional_fields() {
1677        let fm = Frontmatter {
1678            type_: Some("note".into()),
1679            ..Default::default()
1680        };
1681        let yaml = fm.to_yaml();
1682        assert!(yaml.contains("type: note"));
1683        assert!(!yaml.contains("status"));
1684        assert!(!yaml.contains("tags"));
1685        assert!(!yaml.contains("summary"));
1686    }
1687
1688    #[test]
1689    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
1690        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
1691        // *unquoted* inline `company: [[records/companies/northstar]]`
1692        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
1693        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
1694        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
1695        //     company:
1696        //     - - records/companies/northstar
1697        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
1698        // any `fm set` / `link` write) silently destroyed the link.
1699        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
1700        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1701        // Sanity: it really parsed as the nested sequence, not a string.
1702        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
1703
1704        let out = fm.to_yaml();
1705        // The link must survive as a quoted inline scalar — brackets intact, and
1706        // never the bracket-less block sequence `- - records/...`.
1707        assert!(
1708            out.contains("[[records/companies/northstar]]"),
1709            "canonical writer dropped the wiki-link brackets; got:\n{out}"
1710        );
1711        assert!(
1712            !out.contains("- - "),
1713            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
1714        );
1715
1716        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
1717        // one link with the right target (the edge graph/backlinks rely on).
1718        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1719        let fields = reparsed.link_fields();
1720        let links: Vec<(&str, &str, Option<&str>)> = fields
1721            .iter()
1722            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
1723            .collect();
1724        assert_eq!(
1725            links,
1726            vec![("company", "records/companies/northstar", None)]
1727        );
1728
1729        // A second re-emit is a fixed point — no progressive corruption across
1730        // repeated curator-loop writes.
1731        assert_eq!(
1732            reparsed.to_yaml(),
1733            out,
1734            "to_yaml is not idempotent on links"
1735        );
1736    }
1737
1738    #[test]
1739    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
1740        // The `|display` segment must survive the unquoted-inline round-trip too.
1741        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
1742        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1743        let out = fm.to_yaml();
1744        assert!(
1745            out.contains("[[records/companies/northstar|Northstar]]"),
1746            "display segment lost on round-trip; got:\n{out}"
1747        );
1748        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1749        let f = reparsed.link_fields();
1750        assert_eq!(f.len(), 1);
1751        assert_eq!(f[0].1.target, "records/companies/northstar");
1752        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
1753    }
1754
1755    #[test]
1756    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
1757        // A genuine quoted block list of links round-trips as a clean string
1758        // list — never collapsed to a scalar — and a plain nested sequence that
1759        // is NOT a wiki-link is left exactly as written (no false conversion).
1760        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
1761        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
1762        let out = fm.to_yaml();
1763
1764        // Both attendee links survive as quoted strings.
1765        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
1766        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
1767
1768        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
1769        let fields = reparsed.link_fields();
1770        let attendees: Vec<&str> = fields
1771            .iter()
1772            .filter(|(k, _)| k == "attendees")
1773            .map(|(_, l)| l.target.as_str())
1774            .collect();
1775        assert_eq!(
1776            attendees,
1777            vec!["records/contacts/elena", "records/contacts/sarah"]
1778        );
1779        // The non-link nested sequence is preserved verbatim, not touched.
1780        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
1781    }
1782
1783    // ── read_file / write_file round-trip ────────────────────────────────────
1784
1785    #[test]
1786    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
1787        let dir = tempdir().unwrap();
1788        let path = dir.path().join("sources/emails/x.md");
1789        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
1790        let mut fm = Frontmatter {
1791            type_: Some("email".into()),
1792            summary: Some("renewal note".into()),
1793            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1794            ..Default::default()
1795        };
1796        fm.extra
1797            .insert("from".into(), Value::String("elena@northstar.io".into()));
1798
1799        write_file(&path, &fm, body).unwrap();
1800
1801        let (read_fm, read_body) = read_file(&path).unwrap();
1802        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
1803        assert_eq!(read_fm.type_.as_deref(), Some("email"));
1804        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
1805        assert_eq!(
1806            read_fm.extra.get("from").and_then(|v| v.as_str()),
1807            Some("elena@northstar.io")
1808        );
1809        // The on-disk file starts with a fence and ends with the verbatim body.
1810        let raw = std::fs::read_to_string(&path).unwrap();
1811        assert!(raw.starts_with("---\n"));
1812        assert!(raw.ends_with(body));
1813    }
1814
1815    #[test]
1816    fn roundtrip_modify_summary_then_write_changes_only_summary() {
1817        let dir = tempdir().unwrap();
1818        let path = dir.path().join("records/contacts/sarah.md");
1819        let body = "Long-form operator notes about Sarah.\n";
1820        let fm = Frontmatter {
1821            type_: Some("contact".into()),
1822            summary: Some("old summary".into()),
1823            ..Default::default()
1824        };
1825        write_file(&path, &fm, body).unwrap();
1826
1827        // Read → modify summary → write back.
1828        let (mut fm2, body2) = read_file(&path).unwrap();
1829        fm2.summary = Some("new summary".into());
1830        write_file(&path, &fm2, &body2).unwrap();
1831
1832        let (fm3, body3) = read_file(&path).unwrap();
1833        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
1834        assert_eq!(fm3.type_.as_deref(), Some("contact"));
1835        assert_eq!(body3, body, "body unchanged across the round-trip");
1836    }
1837
1838    #[test]
1839    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
1840        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
1841        // a hand-written file carrying the canonical UNQUOTED scalar link
1842        // `company: [[records/companies/northstar]]`, read from disk then written
1843        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
1844        // value to the bracket-less block sequence `company:\n- - records/...`,
1845        // and every reader (validate/graph/backlinks) then lost the edge.
1846        let dir = tempdir().unwrap();
1847        let path = dir.path().join("records/contacts/sarah-chen.md");
1848        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
1849        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
1850        std::fs::write(&path, file).unwrap();
1851
1852        // Read → write back unchanged (the canonical no-op re-emit).
1853        let (fm, body) = read_file(&path).unwrap();
1854        write_file(&path, &fm, &body).unwrap();
1855
1856        // On-disk bytes still carry the bracketed link, never `- - records/...`.
1857        let raw = std::fs::read_to_string(&path).unwrap();
1858        assert!(
1859            raw.contains("[[records/companies/northstar]]"),
1860            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
1861        );
1862        assert!(
1863            !raw.contains("- - "),
1864            "on-disk value became a nested block sequence; got:\n{raw}"
1865        );
1866
1867        // And the edge is still readable after the round-trip.
1868        let (fm2, _) = read_file(&path).unwrap();
1869        let fields = fm2.link_fields();
1870        let links: Vec<(&str, &str)> = fields
1871            .iter()
1872            .map(|(k, l)| (k.as_str(), l.target.as_str()))
1873            .collect();
1874        assert_eq!(links, vec![("company", "records/companies/northstar")]);
1875    }
1876
1877    #[test]
1878    fn write_file_does_not_leave_temp_files_behind() {
1879        let dir = tempdir().unwrap();
1880        let path = dir.path().join("records/x.md");
1881        let fm = Frontmatter {
1882            type_: Some("note".into()),
1883            ..Default::default()
1884        };
1885        write_file(&path, &fm, "body\n").unwrap();
1886        // The directory should contain only the target file, no `.x.md.tmp.*`.
1887        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
1888            .unwrap()
1889            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
1890            .collect();
1891        assert_eq!(entries, vec!["x.md".to_string()]);
1892    }
1893
1894    // ── is_content_file ──────────────────────────────────────────────────────
1895
1896    #[test]
1897    fn is_content_file_recognizes_layers_and_excludes_meta() {
1898        assert!(Frontmatter::is_content_file(Path::new(
1899            "sources/emails/2026-05-22.md"
1900        )));
1901        assert!(Frontmatter::is_content_file(Path::new(
1902            "records/contacts/sarah-chen.md"
1903        )));
1904        assert!(Frontmatter::is_content_file(Path::new(
1905            "wiki/people/sarah-chen.md"
1906        )));
1907        // Absolute paths under a layer are still content.
1908        assert!(Frontmatter::is_content_file(Path::new(
1909            "/home/db/records/companies/northstar.md"
1910        )));
1911        // index.md at any level is meta.
1912        assert!(!Frontmatter::is_content_file(Path::new(
1913            "records/contacts/index.md"
1914        )));
1915        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
1916        // Root meta files.
1917        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
1918        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
1919    }
1920
1921    // ── effective_id ─────────────────────────────────────────────────────────
1922
1923    #[test]
1924    fn effective_id_prefers_explicit_then_derives_from_path() {
1925        let with_id = Frontmatter {
1926            id: Some("explicit-id".into()),
1927            ..Default::default()
1928        };
1929        assert_eq!(
1930            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
1931            "explicit-id"
1932        );
1933        let no_id = Frontmatter::default();
1934        assert_eq!(
1935            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
1936            "sarah-chen"
1937        );
1938    }
1939
1940    // ── get / set ────────────────────────────────────────────────────────────
1941
1942    #[test]
1943    fn set_routes_universal_and_custom_keys() {
1944        let mut fm = Frontmatter::default();
1945        fm.set("type", "contact").unwrap();
1946        fm.set("summary", "hi").unwrap();
1947        fm.set("company", "[[records/companies/northstar]]")
1948            .unwrap();
1949        assert_eq!(fm.type_.as_deref(), Some("contact"));
1950        assert_eq!(fm.summary.as_deref(), Some("hi"));
1951        // Custom key landed in extra, not a typed slot.
1952        assert_eq!(
1953            fm.extra.get("company").and_then(|v| v.as_str()),
1954            Some("[[records/companies/northstar]]")
1955        );
1956        // get reads from both typed fields and extra.
1957        assert_eq!(
1958            fm.get("type").and_then(|v| v.as_str().map(String::from)),
1959            Some("contact".into())
1960        );
1961        assert_eq!(
1962            fm.get("company").and_then(|v| v.as_str().map(String::from)),
1963            Some("[[records/companies/northstar]]".into())
1964        );
1965        assert!(fm.get("nonexistent").is_none());
1966    }
1967
1968    #[test]
1969    fn set_timestamp_validates_rfc3339() {
1970        let mut fm = Frontmatter::default();
1971        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
1972        assert!(fm.created.is_some());
1973        let err = fm.set("updated", "not-a-date").unwrap_err();
1974        assert!(matches!(err, ParseError::BadTimestamp { .. }));
1975    }
1976
1977    // ── extract_wiki_links ───────────────────────────────────────────────────
1978
1979    #[test]
1980    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
1981        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[wiki/people/sarah-chen.md|Sarah]].\n";
1982        let links = extract_wiki_links(body, Path::new("doc.md"));
1983        assert_eq!(links.len(), 3);
1984
1985        // Full path, no extension, no display.
1986        assert_eq!(links[0].target, "records/contacts/sarah-chen");
1987        assert!(links[0].is_full_path);
1988        assert!(!links[0].has_md_extension);
1989        assert_eq!(links[0].display, None);
1990        assert_eq!(links[0].location.1, 1, "first link on line 1");
1991
1992        // Short form: not a full path.
1993        assert_eq!(links[1].target, "sarah-chen");
1994        assert!(!links[1].is_full_path, "bare target is short-form");
1995
1996        // Full path WITH .md extension and a display override on line 2.
1997        assert_eq!(links[2].target, "wiki/people/sarah-chen.md");
1998        assert!(links[2].is_full_path);
1999        assert!(links[2].has_md_extension);
2000        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2001        assert_eq!(links[2].location.1, 2);
2002    }
2003
2004    #[test]
2005    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2006        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2007        let body = "café [[records/x/y]]";
2008        let links = extract_wiki_links(body, Path::new("d.md"));
2009        assert_eq!(links.len(), 1);
2010        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2011        assert_eq!(links[0].location.2, 6);
2012    }
2013
2014    #[test]
2015    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2016        let links = extract_wiki_links(
2017            "records/contacts/sarah-chen is not a link",
2018            Path::new("d.md"),
2019        );
2020        assert!(links.is_empty());
2021    }
2022
2023    // ── extract_markdown_links ───────────────────────────────────────────────
2024
2025    #[test]
2026    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2027        let body =
2028            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2029        let md = extract_markdown_links(body, Path::new("d.md"));
2030        assert_eq!(
2031            md.len(),
2032            1,
2033            "wiki-link must not be captured as a markdown link"
2034        );
2035        assert_eq!(md[0].text, "the thread");
2036        assert_eq!(md[0].url, "https://x.com/a");
2037        assert_eq!(md[0].location.1, 1);
2038
2039        // And the wiki-link extractor must not pick up the markdown link.
2040        let wl = extract_wiki_links(body, Path::new("d.md"));
2041        assert_eq!(wl.len(), 1);
2042        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2043    }
2044
2045    // ── link_fields ──────────────────────────────────────────────────────────
2046
2047    #[test]
2048    fn link_fields_extracts_scalar_list_and_summary_links() {
2049        // The canonical list form quotes each item so YAML parses it as clean
2050        // strings; a scalar field may be quoted OR written in the canonical
2051        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2052        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2053        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2054        // Sanity: company really did parse as a scalar string here.
2055        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2056        let fields = fm.link_fields();
2057
2058        // company (scalar) once, with the right target.
2059        let company: Vec<&str> = fields
2060            .iter()
2061            .filter(|(k, _)| k == "company")
2062            .map(|(_, l)| l.target.as_str())
2063            .collect();
2064        assert_eq!(company, vec!["records/companies/northstar"]);
2065        // attendees (block list) twice.
2066        let attendees: Vec<&str> = fields
2067            .iter()
2068            .filter(|(k, _)| k == "attendees")
2069            .map(|(_, l)| l.target.as_str())
2070            .collect();
2071        assert_eq!(
2072            attendees,
2073            vec!["records/contacts/elena", "records/contacts/sarah"]
2074        );
2075        // summary link surfaced.
2076        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2077        // Plain-text field is not a link.
2078        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2079    }
2080
2081    #[test]
2082    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2083        // Regression: the canonical scalar wiki-link form is the *unquoted*
2084        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2085        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2086        // a naive `as_str()`-only walk drops it. link_fields() must still
2087        // surface exactly one link with the correct target.
2088        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2089        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2090        // Sanity: it really did parse as the nested sequence form, NOT a string.
2091        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2092
2093        let fields = fm.link_fields();
2094        let links: Vec<(&str, &str, Option<&str>)> = fields
2095            .iter()
2096            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2097            .collect();
2098        assert_eq!(
2099            links,
2100            vec![("company", "records/companies/northstar", None)]
2101        );
2102
2103        // The `|display` segment survives the unquoted inline form too.
2104        let fm2 = Frontmatter::parse(
2105            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2106            Path::new("m.md"),
2107        )
2108        .unwrap();
2109        let f2 = fm2.link_fields();
2110        assert_eq!(f2.len(), 1);
2111        assert_eq!(f2[0].0, "company");
2112        assert_eq!(f2[0].1.target, "records/companies/northstar");
2113        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2114    }
2115
2116    #[test]
2117    fn link_fields_ignores_plain_one_item_flow_list() {
2118        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2119        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2120        // NOT be mistaken for a wiki-link.
2121        let yaml = "type: contact\naliases: [foo]";
2122        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2123        assert_eq!(fm.link_fields(), Vec::new());
2124    }
2125
2126    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2127
2128    #[test]
2129    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2130        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2131        // inline wiki-link (double-nested) is NOT.
2132        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2133        let flagged = detect_flow_form_link_lists(bad);
2134        assert_eq!(flagged, vec!["attendees".to_string()]);
2135
2136        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2137        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2138        assert_eq!(
2139            detect_flow_form_link_lists(unquoted_block),
2140            vec!["attendees".to_string()]
2141        );
2142
2143        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2144        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2145        assert!(detect_flow_form_link_lists(good).is_empty());
2146
2147        // A plain scalar list of strings is not flagged.
2148        let plain = "tags: [a, b, c]";
2149        assert!(detect_flow_form_link_lists(plain).is_empty());
2150    }
2151
2152    // ── extract_sections ─────────────────────────────────────────────────────
2153
2154    #[test]
2155    fn extract_sections_levels_nesting_and_boundaries() {
2156        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2157        let secs = extract_sections(body);
2158        let headings: Vec<(&str, u8)> =
2159            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2160        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2161
2162        // "First" (H2) body extends through its H3 child, stopping at "Second".
2163        let first = &secs[0];
2164        assert!(first.body.contains("alpha"));
2165        assert!(first.body.contains("### Sub"));
2166        assert!(first.body.contains("beta"));
2167        assert!(!first.body.contains("Second"));
2168
2169        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2170        let sub = &secs[1];
2171        assert!(sub.body.contains("beta"));
2172        assert!(!sub.body.contains("gamma"));
2173
2174        // 1-based line numbers within the body.
2175        assert_eq!(first.line, 2);
2176        assert_eq!(secs[2].line, 6);
2177    }
2178
2179    #[test]
2180    fn extract_sections_ignores_headings_in_fenced_code() {
2181        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2182        let secs = extract_sections(body);
2183        assert_eq!(secs.len(), 1);
2184        assert_eq!(secs[0].heading, "Real");
2185        // The fenced "## Fake" is part of Real's body, not its own section.
2186        assert!(secs[0].body.contains("## Fake heading in code"));
2187    }
2188
2189    // ── parse_field_spec ─────────────────────────────────────────────────────
2190
2191    #[test]
2192    fn parse_field_spec_required_and_shape() {
2193        let f = parse_field_spec("- email (required, email)");
2194        assert_eq!(f.name, "email");
2195        assert!(f.required);
2196        assert_eq!(f.shape, Some(Shape::Email));
2197        assert!(f.unknown_modifiers.is_empty());
2198    }
2199
2200    #[test]
2201    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2202        let f = parse_field_spec("- company (required, link to records/companies/)");
2203        assert!(f.required);
2204        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2205        assert_eq!(f.shape, None);
2206    }
2207
2208    #[test]
2209    fn parse_field_spec_default_preserves_case_and_value() {
2210        let f = parse_field_spec("- currency (default USD)");
2211        assert_eq!(f.name, "currency");
2212        assert_eq!(f.default, Some(Value::String("USD".into())));
2213    }
2214
2215    #[test]
2216    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2217        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2218        assert!(f.required);
2219        assert_eq!(
2220            f.enum_values,
2221            Some(vec![
2222                "open".to_string(),
2223                "closed".to_string(),
2224                "pending".to_string()
2225            ])
2226        );
2227    }
2228
2229    #[test]
2230    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2231        let f = parse_field_spec("- weird (required, frobnicate, string)");
2232        assert!(f.required);
2233        assert_eq!(f.shape, Some(Shape::String));
2234        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2235    }
2236
2237    #[test]
2238    fn parse_field_spec_no_parens_is_freeform_optional() {
2239        let f = parse_field_spec("- nickname");
2240        assert_eq!(f.name, "nickname");
2241        assert!(!f.required);
2242        assert_eq!(f.shape, None);
2243        assert!(f.link_prefix.is_none());
2244        assert!(f.enum_values.is_none());
2245        assert!(f.unknown_modifiers.is_empty());
2246    }
2247
2248    // ── parse_db_md ──────────────────────────────────────────────────────────
2249
2250    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2251
2252    #[test]
2253    fn parse_db_md_extracts_all_canonical_sections() {
2254        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2255
2256        // Agent instructions: free-form prose, heading line stripped.
2257        let ai = config
2258            .agent_instructions
2259            .expect("agent instructions present");
2260        assert!(ai.starts_with("Prioritize creating"));
2261        assert!(!ai.contains("## Agent instructions"));
2262
2263        // Frozen pages: paths extracted from backticked bullets, comments dropped.
2264        assert_eq!(
2265            config.frozen_pages,
2266            vec![
2267                PathBuf::from("records/decisions/2026-q1-strategy.md"),
2268                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
2269            ]
2270        );
2271
2272        // Ignored types: comma list, backticks/comment stripped.
2273        assert_eq!(
2274            config.ignored_types,
2275            vec!["test".to_string(), "temp".to_string()]
2276        );
2277
2278        // Schemas: two types, each with its fields in source order.
2279        assert_eq!(config.schemas.len(), 2);
2280        let contact = config.schemas.get("contact").expect("contact schema");
2281        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
2282        assert_eq!(names, vec!["name", "email", "company", "role"]);
2283        assert!(contact.fields[0].required); // name
2284        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
2285        assert_eq!(
2286            contact.fields[2].link_prefix,
2287            Some(PathBuf::from("records/companies"))
2288        ); // company
2289
2290        let expense = config.schemas.get("expense").expect("expense schema");
2291        let cur = expense
2292            .fields
2293            .iter()
2294            .find(|f| f.name == "currency")
2295            .unwrap();
2296        assert_eq!(cur.default, Some(Value::String("USD".into())));
2297    }
2298
2299    #[test]
2300    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
2301        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
2302        // unknown modifier, and bullets that appear with NO `### <type>`
2303        // heading (so they belong to no schema and are dropped).
2304        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
2305        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2306
2307        // The orphan bullet under `## Schemas` with no `### type` heading is not
2308        // captured as a schema.
2309        assert_eq!(config.schemas.len(), 1);
2310        let ticket = config.schemas.get("ticket").expect("ticket schema");
2311        assert_eq!(ticket.fields.len(), 2);
2312
2313        let priority = &ticket.fields[0];
2314        assert!(priority.required);
2315        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
2316        assert_eq!(
2317            priority.enum_values,
2318            Some(vec!["low".to_string(), "high".to_string()])
2319        );
2320
2321        // A bullet with an unclosed paren still yields a usable name.
2322        let broken = &ticket.fields[1];
2323        assert_eq!(broken.name, "broken");
2324    }
2325
2326    #[test]
2327    fn parse_db_md_missing_frontmatter_errors() {
2328        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
2329        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
2330        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2331    }
2332
2333    #[test]
2334    fn parse_db_md_absent_sections_default_empty() {
2335        let text = "---\ntype: db-md\n---\n\n# Title only\n";
2336        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2337        assert_eq!(config, Config::default());
2338    }
2339
2340    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
2341
2342    /// `Frontmatter::set` is the value path every write surface (`fm set`,
2343    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
2344    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
2345    /// quoted links — readable back by [`links_in_field_value`] and accepted by
2346    /// `dbmd validate` — never the flow-form scalar string that trips
2347    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
2348    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
2349    #[test]
2350    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
2351        for value in [
2352            "[[[records/contacts/a]], [[records/contacts/b]]]",
2353            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
2354        ] {
2355            let mut fm = Frontmatter::default();
2356            fm.set("attendees", value).unwrap();
2357
2358            // Stored as a 2-element sequence of clean quoted links.
2359            let stored = fm.extra.get("attendees").expect("attendees set");
2360            let Value::Sequence(items) = stored else {
2361                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
2362            };
2363            assert_eq!(items.len(), 2, "input {value}");
2364            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
2365            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
2366
2367            // The edge enumerator reads exactly the two links back (no stray
2368            // bracket targets, the flow-form-string symptom).
2369            let links: Vec<_> = links_in_field_value(stored)
2370                .into_iter()
2371                .map(|l| l.target)
2372                .collect();
2373            assert_eq!(
2374                links,
2375                vec!["records/contacts/a", "records/contacts/b"],
2376                "input {value}"
2377            );
2378
2379            // And the canonical writer renders it block-style, not as a scalar.
2380            let yaml = fm.to_yaml();
2381            assert!(
2382                yaml.contains("attendees:\n"),
2383                "expected block list in:\n{yaml}"
2384            );
2385            assert!(
2386                !yaml.contains("attendees: '[["),
2387                "must not be a flow-form scalar string in:\n{yaml}"
2388            );
2389        }
2390    }
2391
2392    /// A *single* inline wiki-link stays a scalar string (renders inline
2393    /// `field: [[x]]`), and a single link must never be widened to a one-item
2394    /// list — preserving the common `contact.company` / `expense.vendor` shape.
2395    #[test]
2396    fn set_single_inline_wiki_link_stays_scalar() {
2397        let mut fm = Frontmatter::default();
2398        fm.set("company", "[[records/companies/tideform]]").unwrap();
2399        assert_eq!(
2400            fm.extra.get("company"),
2401            Some(&Value::String("[[records/companies/tideform]]".into())),
2402        );
2403        // Still recognized as one link.
2404        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
2405            .into_iter()
2406            .map(|l| l.target)
2407            .collect();
2408        assert_eq!(links, vec!["records/companies/tideform"]);
2409    }
2410
2411    /// Plain text and a non-link flow list are left as verbatim scalar strings —
2412    /// the list normalization only triggers when every item is a clean wiki-link.
2413    #[test]
2414    fn set_non_link_values_stay_scalar_strings() {
2415        let mut fm = Frontmatter::default();
2416        fm.set("location", "Video call (remote)").unwrap();
2417        assert_eq!(
2418            fm.extra.get("location"),
2419            Some(&Value::String("Video call (remote)".into())),
2420        );
2421
2422        // A flow list whose items are NOT wiki-links must not be reinterpreted as
2423        // a link sequence; it stays the scalar string the agent passed.
2424        fm.set("note", "[draft, wip]").unwrap();
2425        assert_eq!(
2426            fm.extra.get("note"),
2427            Some(&Value::String("[draft, wip]".into()))
2428        );
2429    }
2430}