Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The three canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 3] = ["sources", "records", "wiki"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `id` — optional; derived from the file path when absent.
81    pub id: Option<String>,
82    /// `created` — RFC3339; required and auto-set on content-file create.
83    pub created: Option<DateTime<FixedOffset>>,
84    /// `updated` — RFC3339; required and auto-maintained on content files.
85    pub updated: Option<DateTime<FixedOffset>>,
86    /// `summary` — the one-line catalog line; required on every content file.
87    pub summary: Option<String>,
88    /// `status` — optional lifecycle state.
89    pub status: Option<String>,
90    /// `tags` — optional flat list of short scalar labels.
91    pub tags: Vec<String>,
92    /// All other frontmatter keys (type-specific + custom), preserved verbatim
93    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
94    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
95    /// [`WikiLink`]s.
96    pub extra: BTreeMap<String, Value>,
97}
98
99impl Frontmatter {
100    /// Parse a YAML frontmatter block (the text between the opening and closing
101    /// `---` fences, exclusive) into a [`Frontmatter`].
102    ///
103    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
104    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
105    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
106        // An empty (or whitespace-only) frontmatter block is a valid, empty
107        // mapping — not a YAML error.
108        let value: Value = if yaml.trim().is_empty() {
109            Value::Mapping(Mapping::new())
110        } else {
111            serde_norway::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
112                file: file.to_path_buf(),
113                source,
114            })?
115        };
116
117        // Top-level frontmatter must be a mapping. A scalar or sequence at the
118        // top level is malformed for our purposes; surface it as such.
119        let map = match value {
120            Value::Mapping(m) => m,
121            Value::Null => Mapping::new(),
122            other => {
123                // serde_norway::Error has no public constructor, so manufacture a
124                // representative one by deserializing the (sequence/scalar)
125                // value into a Mapping, which always fails with a type error.
126                let source = serde_norway::from_value::<Mapping>(other)
127                    .expect_err("non-mapping frontmatter top level deserializes to Mapping");
128                return Err(ParseError::MalformedYaml {
129                    file: file.to_path_buf(),
130                    source,
131                });
132            }
133        };
134
135        let mut fm = Frontmatter::default();
136        for (k, v) in map {
137            let key = match k.as_str() {
138                Some(s) => s.to_string(),
139                // Non-string keys are unusual; stringify defensively and keep
140                // them in `extra` so nothing is silently dropped.
141                None => format!("{k:?}"),
142            };
143            match key.as_str() {
144                "type" => fm.type_ = v.as_str().map(str::to_string),
145                "id" => fm.id = v.as_str().map(str::to_string),
146                "created" => fm.created = parse_timestamp(&v, "created", file)?,
147                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
148                "summary" => fm.summary = v.as_str().map(str::to_string),
149                "status" => fm.status = v.as_str().map(str::to_string),
150                "tags" => fm.tags = parse_tags(&v),
151                _ => {
152                    fm.extra.insert(key, v);
153                }
154            }
155        }
156        Ok(fm)
157    }
158
159    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
160    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
161    pub fn to_yaml(&self) -> String {
162        // Build an order-preserving mapping in canonical key order:
163        //   type, id, created, updated, summary  (universal head)
164        //   <type-specific extra, BTreeMap-sorted>
165        //   status, tags                          (universal tail)
166        // serde_norway::Mapping preserves insertion order, so one serialize call
167        // emits the block in exactly this order with correct YAML quoting.
168        let mut map = Mapping::new();
169
170        if let Some(t) = &self.type_ {
171            map.insert(Value::String("type".into()), Value::String(t.clone()));
172        }
173        if let Some(id) = &self.id {
174            map.insert(Value::String("id".into()), Value::String(id.clone()));
175        }
176        if let Some(created) = &self.created {
177            map.insert(
178                Value::String("created".into()),
179                Value::String(created.to_rfc3339()),
180            );
181        }
182        if let Some(updated) = &self.updated {
183            map.insert(
184                Value::String("updated".into()),
185                Value::String(updated.to_rfc3339()),
186            );
187        }
188        if let Some(summary) = &self.summary {
189            map.insert(
190                Value::String("summary".into()),
191                Value::String(summary.clone()),
192            );
193        }
194
195        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
196        // is canonicalized so a wiki-link round-trips to the form the writer and
197        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
198        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
199        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
200        // block sequence `- - x` that a verbatim re-emit would produce and that
201        // destroys the link. See [`canonicalize_extra_value`].
202        for (k, v) in &self.extra {
203            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
204        }
205
206        if let Some(status) = &self.status {
207            map.insert(
208                Value::String("status".into()),
209                Value::String(status.clone()),
210            );
211        }
212        if !self.tags.is_empty() {
213            map.insert(
214                Value::String("tags".into()),
215                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
216            );
217        }
218
219        if map.is_empty() {
220            return String::new();
221        }
222        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
223    }
224
225    /// True if the file is content (under `sources/`, `records/`, or `wiki/`)
226    /// and not an `index.md`. Used by validate to decide which files require a
227    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
228    pub fn is_content_file(path: &Path) -> bool {
229        // index.md is a meta file at every level, never content.
230        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
231            return false;
232        }
233        // Content iff some path component is one of the three layer dirs. This
234        // works for both store-relative (`sources/emails/x.md`) and absolute
235        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
236        // root, under no layer, so they fall through to false.
237        path.components().any(|c| {
238            c.as_os_str()
239                .to_str()
240                .is_some_and(|s| LAYER_DIRS.contains(&s))
241        })
242    }
243
244    /// Resolve the file's effective `id`: the explicit `id` field if present,
245    /// otherwise derived from the store-relative path (filename without `.md`).
246    pub fn effective_id(&self, store_relative_path: &Path) -> String {
247        if let Some(id) = &self.id {
248            if !id.is_empty() {
249                return id.clone();
250            }
251        }
252        // Derived id = filename without the `.md` extension.
253        store_relative_path
254            .file_stem()
255            .and_then(|s| s.to_str())
256            .unwrap_or_default()
257            .to_string()
258    }
259
260    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
261    /// typed fields first and then [`extra`](Frontmatter::extra).
262    pub fn get(&self, key: &str) -> Option<Value> {
263        match key {
264            "type" => self.type_.clone().map(Value::String),
265            "id" => self.id.clone().map(Value::String),
266            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
267            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
268            "summary" => self.summary.clone().map(Value::String),
269            "status" => self.status.clone().map(Value::String),
270            "tags" => {
271                if self.tags.is_empty() {
272                    None
273                } else {
274                    Some(Value::Sequence(
275                        self.tags.iter().cloned().map(Value::String).collect(),
276                    ))
277                }
278            }
279            _ => self.extra.get(key).cloned(),
280        }
281    }
282
283    /// Set a single frontmatter key from a string value, routing universal-
284    /// contract keys to their typed fields and everything else to
285    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
286    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
287        match key {
288            "type" => self.type_ = Some(value.to_string()),
289            "id" => self.id = Some(value.to_string()),
290            "created" => {
291                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
292            }
293            "updated" => {
294                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
295            }
296            "summary" => self.summary = Some(value.to_string()),
297            "status" => self.status = Some(value.to_string()),
298            "tags" => {
299                // Accept either a YAML flow list (`[a, b]`) or a single scalar
300                // tag. Anything that parses to a sequence becomes the tag list;
301                // otherwise the whole string is one tag.
302                self.tags = match serde_norway::from_str::<Value>(value) {
303                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
304                    _ => vec![value.to_string()],
305                };
306            }
307            _ => {
308                // A custom / type-specific field. The value is a scalar string by
309                // default, but the spec's list-valued link fields (e.g.
310                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
311                // block sequence of quoted wiki-links — never the flow-form string
312                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
313                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
314                // sequence whose every item is a clean single wiki-link, store the
315                // canonical sequence so `to_yaml` emits block form. Everything else
316                // — plain text, and a single inline `[[x]]` (which YAML reads as a
317                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
318                // verbatim scalar string, preserving the prior behavior.
319                let stored = parse_link_list_value(value)
320                    .unwrap_or_else(|| Value::String(value.to_string()));
321                self.extra.insert(key.to_string(), stored);
322            }
323        }
324        Ok(())
325    }
326
327    /// Extract every frontmatter field whose value is a wiki-link (scalar
328    /// inline form or a block-sequence list), pairing each with its key. The
329    /// validate engine checks these against `(link)` schema annotations.
330    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
331        let mut out = Vec::new();
332        // `summary` may carry navigational wiki-links (spec encourages it).
333        if let Some(summary) = &self.summary {
334            for link in extract_wiki_links(summary, Path::new("")) {
335                out.push(("summary".to_string(), link));
336            }
337        }
338        // Every type-specific / custom field: a scalar wiki-link or a list of
339        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
340        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
341        for (key, value) in &self.extra {
342            for link in links_in_field_value(value) {
343                out.push((key.clone(), link));
344            }
345        }
346        out
347    }
348}
349
350/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
351///
352/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
353/// flags whether it's a full store-relative path (the doctrine) versus a
354/// short-form (a validation error).
355#[derive(Debug, Clone, PartialEq, Eq)]
356pub struct WikiLink {
357    /// The link target as written, without the `[[ ]]` and without `|display`.
358    pub target: String,
359    /// The optional `|display` text override.
360    pub display: Option<String>,
361    /// True when `target` is a full store-relative path (contains a `/` and
362    /// resolves under a known layer); false for short-form targets like
363    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
364    pub is_full_path: bool,
365    /// True when `target` carries a trailing `.md` extension — validate warns
366    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
367    pub has_md_extension: bool,
368    /// Where the link appears: `(file, line, col)`, 1-based line and column.
369    pub location: (PathBuf, u32, u32),
370}
371
372/// A standard markdown link `[text](url)` — an external reference, kept in a
373/// stream separate from [`WikiLink`] so external targets are visible to the
374/// toolkit without being conflated with in-store edges. Not graph-validated.
375#[derive(Debug, Clone, PartialEq, Eq)]
376pub struct MarkdownLink {
377    /// The link text inside `[ ]`.
378    pub text: String,
379    /// The URL or path inside `( )`.
380    pub url: String,
381    /// Where the link appears: `(file, line, col)`, 1-based.
382    pub location: (PathBuf, u32, u32),
383}
384
385/// A `##`/`###` section of a markdown body: the heading text plus the byte
386/// slice of the body it spans (heading line through the line before the next
387/// heading of equal-or-shallower depth).
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct Section {
390    /// The heading text (without the leading `#`s).
391    pub heading: String,
392    /// Heading depth (number of leading `#`s).
393    pub level: u8,
394    /// The 1-based line where the heading appears.
395    pub line: u32,
396    /// The section body, from the heading line to the next sibling-or-shallower
397    /// heading (exclusive), as a slice of the original body.
398    pub body: String,
399}
400
401/// The parsed structured content of a store's `DB.md` config file.
402///
403/// All four parts are optional in the source; absent parts fall back to spec
404/// defaults. Produced by [`parse_db_md`].
405#[derive(Debug, Clone, Default, PartialEq)]
406pub struct Config {
407    /// Body of the `## Agent instructions` section — free-form prose passed to
408    /// the agent's system prompt.
409    pub agent_instructions: Option<String>,
410    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
411    /// refuses to write (`POLICY_FROZEN_PAGE`).
412    pub frozen_pages: Vec<PathBuf>,
413    /// `## Policies` → `### Ignored types`: type names the curator never
414    /// synthesizes (still readable as ambient context).
415    pub ignored_types: Vec<String>,
416    /// `## Schemas` → one entry per `### <type>` sub-section.
417    pub schemas: BTreeMap<String, Schema>,
418}
419
420impl Config {
421    /// The `### Frozen pages` entry that matches a store-relative `target`, if
422    /// any. The **single** frozen-page matcher every write surface must funnel
423    /// through so the policy is enforced identically on `write` / `fm set` /
424    /// `fm init` / `link` / `rename` / `format`.
425    ///
426    /// Comparison is normalized so a policy line and a write target match
427    /// regardless of incidental spelling differences:
428    /// - `/` path separators on every OS,
429    /// - a single leading `./` dropped,
430    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
431    ///   frozen entries verbatim, so an operator who writes the natural
432    ///   extensionless spelling (`records/decisions/q1`) must protect the file
433    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
434    ///
435    /// Returns the matched config entry verbatim (its original spelling) so the
436    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
437    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
438        let want = normalize_frozen_path(target);
439        self.frozen_pages
440            .iter()
441            .find(|frozen| normalize_frozen_path(frozen) == want)
442            .cloned()
443    }
444
445    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
446    /// over [`Config::frozen_match`] for callers that only need presence.
447    pub fn is_frozen(&self, target: &Path) -> bool {
448        self.frozen_match(target).is_some()
449    }
450}
451
452/// Normalize a path for frozen-page comparison: `/` separators, a single
453/// leading `./` dropped, and a trailing `.md` dropped. Both the policy entry
454/// and the write target pass through this before equality, so the match is
455/// separator-, `./`-, and `.md`-insensitive.
456fn normalize_frozen_path(p: &Path) -> String {
457    let unix: String = p
458        .components()
459        .filter_map(|c| c.as_os_str().to_str())
460        .collect::<Vec<_>>()
461        .join("/");
462    let no_dot = unix.strip_prefix("./").unwrap_or(&unix);
463    no_dot.strip_suffix(".md").unwrap_or(no_dot).to_string()
464}
465
466/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
467/// The store's `## Schemas` is the **only** source of schema enforcement — the
468/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
469#[derive(Debug, Clone, Default, PartialEq)]
470pub struct Schema {
471    /// One [`FieldSpec`] per bulleted field line, in source order.
472    pub fields: Vec<FieldSpec>,
473    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
474    /// uniqueness constraint over the listed field(s) (compound when >1). Two
475    /// records of this type whose listed values collide warn as
476    /// `DUP_UNIQUE_KEY`.
477    pub unique_keys: Vec<Vec<String>>,
478    /// `- summary_template: <template>` directive — the `{field}` interpolation
479    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
480    /// for this type. `None` falls back to the body's first paragraph.
481    pub summary_template: Option<String>,
482}
483
484/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
485///
486/// Modifiers are comma-separated inside the parens; this captures the
487/// recognized ones as typed fields and stashes anything unrecognized in
488/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
489#[derive(Debug, Clone, Default, PartialEq)]
490pub struct FieldSpec {
491    /// The field name.
492    pub name: String,
493    /// `required` modifier present.
494    pub required: bool,
495    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
496    /// `url`), if any.
497    pub shape: Option<Shape>,
498    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
499    /// start with. The trailing slash is required in the source syntax.
500    pub link_prefix: Option<PathBuf>,
501    /// `default <value>` — the value written when the field is absent.
502    pub default: Option<Value>,
503    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
504    /// on the line because of its own commas).
505    pub enum_values: Option<Vec<String>>,
506    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
507    /// validate surfaces these as `Info`, never errors.
508    pub unknown_modifiers: Vec<String>,
509}
510
511/// A recognized shape modifier for a schema field. Validate enforces the
512/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
513#[derive(Debug, Clone, Copy, PartialEq, Eq)]
514pub enum Shape {
515    /// Any scalar string.
516    String,
517    /// Integer.
518    Int,
519    /// Boolean.
520    Bool,
521    /// RFC3339 / ISO-8601 date.
522    Date,
523    /// `<local>@<domain>` email address.
524    Email,
525    /// A currency amount.
526    Currency,
527    /// A URL.
528    Url,
529}
530
531/// The result of splitting a raw file into its frontmatter block and body.
532///
533/// `body` is the verbatim remainder after the closing `---` fence — the writer
534/// preserves it byte-for-byte so operator edits are never reflowed.
535#[derive(Debug, Clone, PartialEq, Eq)]
536pub struct ParsedFile {
537    /// The raw frontmatter YAML (between the fences, exclusive of them).
538    pub frontmatter_yaml: String,
539    /// The verbatim body (everything after the closing `---`).
540    pub body: String,
541}
542
543/// Split a file's full text into its frontmatter block and body. The
544/// frontmatter block must be the very first thing in the file, delimited by
545/// `---` on its own line at start and end. Returns
546/// [`ParseError::MissingFrontmatter`] if absent.
547pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
548    // The opening fence must be the very first line: `---` (optionally with a
549    // trailing CR), no leading whitespace, nothing before it.
550    let mut lines = text.split_inclusive('\n');
551    let first = lines.next().unwrap_or("");
552    if first.trim_end_matches(['\r', '\n']) != "---" {
553        return Err(ParseError::MissingFrontmatter {
554            file: file.to_path_buf(),
555        });
556    }
557
558    // Scan for the closing fence line. Track byte offsets so we can slice the
559    // YAML (between fences, exclusive) and the body (verbatim, after the
560    // closing fence's line terminator).
561    let opening_len = first.len();
562    let mut offset = opening_len;
563    for line in lines {
564        if line.trim_end_matches(['\r', '\n']) == "---" {
565            let yaml = &text[opening_len..offset];
566            let body_start = offset + line.len();
567            let body = &text[body_start..];
568            return Ok(ParsedFile {
569                frontmatter_yaml: yaml.to_string(),
570                body: body.to_string(),
571            });
572        }
573        offset += line.len();
574    }
575
576    // Opening fence present but no closing fence: malformed frontmatter block.
577    Err(ParseError::MissingFrontmatter {
578        file: file.to_path_buf(),
579    })
580}
581
582/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
583/// verbatim body string.
584pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
585    let text = std::fs::read_to_string(path)?;
586    let parsed = split_frontmatter(&text, path)?;
587    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
588    Ok((fm, parsed.body))
589}
590
591/// Atomically write a markdown file from frontmatter + body: emit the
592/// frontmatter in canonical key order, then the body verbatim, via a
593/// temp-file-rename so a reader never sees a half-written file. Preserves the
594/// operator-edited body exactly as given.
595pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
596    use std::io::Write;
597
598    let yaml = frontmatter.to_yaml();
599    // `to_yaml` already terminates each block with a newline. Compose the file
600    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
601    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
602    contents.push_str("---\n");
603    contents.push_str(&yaml);
604    contents.push_str("---\n");
605    contents.push_str(body);
606
607    // Atomic write: write to a sibling temp file in the same directory, then
608    // rename over the target. Same-dir rename is atomic on a single
609    // filesystem, so a concurrent reader never sees a half-written file.
610    let parent = path.parent().unwrap_or_else(|| Path::new("."));
611    std::fs::create_dir_all(parent)?;
612    let file_name = path
613        .file_name()
614        .and_then(|n| n.to_str())
615        .unwrap_or("dbmd-write");
616    let (mut f, tmp) = create_temp_file(parent, file_name)?;
617
618    // Scope the handle so it is flushed and closed before the rename.
619    {
620        f.write_all(contents.as_bytes())?;
621        f.sync_all()?;
622    }
623    // On failure, clean up the temp file rather than leaking it.
624    if let Err(e) = std::fs::rename(&tmp, path) {
625        let _ = std::fs::remove_file(&tmp);
626        return Err(ParseError::Io(e));
627    }
628    sync_parent_dir(parent);
629    Ok(())
630}
631
632fn create_temp_file(parent: &Path, file_name: &str) -> std::io::Result<(std::fs::File, PathBuf)> {
633    use std::sync::atomic::{AtomicU64, Ordering};
634    use std::time::{SystemTime, UNIX_EPOCH};
635
636    static TMP_SEQ: AtomicU64 = AtomicU64::new(0);
637    let pid = std::process::id();
638    let nanos = SystemTime::now()
639        .duration_since(UNIX_EPOCH)
640        .map(|d| d.as_nanos())
641        .unwrap_or(0);
642
643    for _ in 0..128 {
644        let seq = TMP_SEQ.fetch_add(1, Ordering::Relaxed);
645        let tmp = parent.join(format!(".{file_name}.tmp.{pid}.{nanos}.{seq}"));
646        match std::fs::OpenOptions::new()
647            .write(true)
648            .create_new(true)
649            .open(&tmp)
650        {
651            Ok(file) => return Ok((file, tmp)),
652            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => continue,
653            Err(e) => return Err(e),
654        }
655    }
656
657    Err(std::io::Error::new(
658        std::io::ErrorKind::AlreadyExists,
659        "could not allocate a unique dbmd temp file",
660    ))
661}
662
663fn sync_parent_dir(parent: &Path) {
664    if let Ok(dir) = std::fs::File::open(parent) {
665        let _ = dir.sync_all();
666    }
667}
668
669/// Extract every wiki-link from a body (and inline frontmatter), returning the
670/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
671/// `(file, line, col)` locations set.
672pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
673    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
674    let re = RE.get_or_init(|| {
675        // [[target]] or [[target|display]]; target/display exclude brackets and
676        // (for target) the `|` separator so nested forms don't over-match.
677        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
678    });
679
680    let mut out = Vec::new();
681    for (line_idx, line) in body.lines().enumerate() {
682        for caps in re.captures_iter(line) {
683            let whole = caps.get(0).expect("group 0 always present");
684            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
685            let display = caps.get(2).map(|m| m.as_str().to_string());
686            out.push(WikiLink {
687                is_full_path: target_is_full_path(&target),
688                has_md_extension: target_has_md_extension(&target),
689                target,
690                display,
691                location: (
692                    file.to_path_buf(),
693                    (line_idx as u32) + 1,
694                    char_column(line, whole.start()),
695                ),
696            });
697        }
698    }
699    out
700}
701
702/// Extract every standard markdown link `[text](url)` from a body into a
703/// separate stream, kept distinct from wiki-links.
704pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
705    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
706    let re = RE.get_or_init(|| {
707        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
708        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
709        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
710    });
711
712    let mut out = Vec::new();
713    for (line_idx, line) in body.lines().enumerate() {
714        for caps in re.captures_iter(line) {
715            let whole = caps.get(0).expect("group 0 always present");
716            out.push(MarkdownLink {
717                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
718                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
719                location: (
720                    file.to_path_buf(),
721                    (line_idx as u32) + 1,
722                    char_column(line, whole.start()),
723                ),
724            });
725        }
726    }
727    out
728}
729
730/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
731/// written so YAML parses it as nested sequences instead of a clean list of
732/// strings. Returns the offending keys so validate can emit
733/// `WIKI_LINK_FLOW_FORM_LIST`.
734///
735/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
736/// shapes nest:
737///
738/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
739///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
740/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
741///   (triple-nested). The list mis-encoding — flagged.
742/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
743///   is flagged too; the canonical list form must quote each item
744///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
745///
746/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
747/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
748/// single item is a sequence-of-scalars.
749pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
750    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
751        Ok(v) => v,
752        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
753        Err(_) => return Vec::new(),
754    };
755    let Value::Mapping(map) = value else {
756        return Vec::new();
757    };
758
759    let mut out = Vec::new();
760    for (k, v) in &map {
761        if let Value::Sequence(items) = v {
762            // Triple-nesting: some outer item is a sequence that itself holds a
763            // sequence. Scalar inline `[[x]]` is only double-nested, so it
764            // never matches.
765            let is_link_list = items.iter().any(|item| match item {
766                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
767                _ => false,
768            });
769            if is_link_list {
770                if let Some(key) = k.as_str() {
771                    out.push(key.to_string());
772                }
773            }
774        }
775    }
776    out
777}
778
779/// Extract the `##`/`###` sections of a markdown body into a flat list with
780/// body slices.
781pub fn extract_sections(body: &str) -> Vec<Section> {
782    // Keep each line's start so we can slice the body verbatim (exact newlines).
783    let lines: Vec<&str> = body.split_inclusive('\n').collect();
784
785    // First pass: classify heading levels (0 = not a heading), honoring fenced
786    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
787    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
788    let mut fence: Option<(u8, usize)> = None;
789    for line in &lines {
790        let content = line.trim_end_matches(['\n', '\r']);
791        if let Some(f) = fence {
792            if is_closing_fence(content, f) {
793                fence = None;
794            }
795            levels.push(0);
796            continue;
797        }
798        if let Some(opened) = opening_fence(content) {
799            fence = Some(opened);
800            levels.push(0);
801            continue;
802        }
803        levels.push(heading_level(content));
804    }
805
806    // Second pass: emit `##`+ headings; each section body runs from its heading
807    // line to the next heading at an equal-or-shallower level (exclusive).
808    let mut sections = Vec::new();
809    for (i, &lvl) in levels.iter().enumerate() {
810        if lvl < 2 {
811            continue;
812        }
813        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
814        let heading = heading_text(heading_line, lvl);
815
816        let mut end = lines.len();
817        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
818            if other != 0 && other <= lvl {
819                end = j;
820                break;
821            }
822        }
823
824        sections.push(Section {
825            heading,
826            level: lvl,
827            line: (i + 1) as u32,
828            body: lines[i..end].concat(),
829        });
830    }
831    sections
832}
833
834/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
835/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
836/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
837/// ignored; absent sections leave their [`Config`] fields at default.
838pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
839    // The structured sections live in the body (after frontmatter). DB.md must
840    // still start with a valid `---` block (`type: db-md`); if it's missing we
841    // surface MissingFrontmatter like any other file.
842    let parsed = split_frontmatter(text, file)?;
843    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
844    let sections = extract_sections(&parsed.body);
845
846    let mut config = Config::default();
847    // Track which H2 region each H3 belongs to as we walk the flat list.
848    let mut current_h2: Option<String> = None;
849
850    for section in &sections {
851        match section.level {
852            2 => {
853                let name = section.heading.trim().to_ascii_lowercase();
854                current_h2 = Some(name.clone());
855                if name == "agent instructions" {
856                    let prose = section_prose(&section.body);
857                    if !prose.is_empty() {
858                        config.agent_instructions = Some(prose);
859                    }
860                }
861            }
862            3 => {
863                let h2 = current_h2.as_deref().unwrap_or("");
864                let h3 = section.heading.trim().to_ascii_lowercase();
865                match (h2, h3.as_str()) {
866                    ("policies", "frozen pages") => {
867                        config.frozen_pages = bullet_lines(&section.body)
868                            .into_iter()
869                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
870                            .collect();
871                    }
872                    ("policies", "ignored types") => {
873                        config.ignored_types = bullet_lines(&section.body)
874                            .into_iter()
875                            .flat_map(|b| extract_type_list_bullet(&b))
876                            .collect();
877                    }
878                    ("schemas", _) => {
879                        // The H3 heading text (as written) is the type name.
880                        let type_name = section.heading.trim().to_string();
881                        let mut schema = Schema::default();
882                        for b in bullet_lines(&section.body) {
883                            match parse_schema_bullet(&b) {
884                                SchemaBullet::Field(f) => schema.fields.push(f),
885                                SchemaBullet::Unique(k) if !k.is_empty() => {
886                                    schema.unique_keys.push(k)
887                                }
888                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
889                                    schema.summary_template = Some(t)
890                                }
891                                // Empty directive (`- unique:` with no fields) — ignore.
892                                SchemaBullet::Unique(_) | SchemaBullet::SummaryTemplate(_) => {}
893                            }
894                        }
895                        config.schemas.insert(type_name, schema);
896                    }
897                    _ => {}
898                }
899            }
900            _ => {}
901        }
902    }
903
904    Ok(config)
905}
906
907/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
908/// reserved directive (`unique:` / `summary_template:`). The names `unique` and
909/// `summary_template` are reserved and cannot be used as field names.
910#[derive(Debug)]
911enum SchemaBullet {
912    /// An ordinary `- <name> (<modifiers>)` field.
913    Field(FieldSpec),
914    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
915    Unique(Vec<String>),
916    /// `- summary_template: <template>` — the default-`summary` pattern.
917    SummaryTemplate(String),
918}
919
920/// Classify one `## Schemas` bullet as a directive or a field. The directive
921/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
922/// guards against false positives — a field like `- status (enum: a, b)` has a
923/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
924/// as a [`FieldSpec`].
925fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
926    let line = bullet_line.trim();
927    let line = line
928        .strip_prefix("- ")
929        .or_else(|| line.strip_prefix("* "))
930        .or_else(|| line.strip_prefix("+ "))
931        .or_else(|| line.strip_prefix('-'))
932        .unwrap_or(line)
933        .trim();
934
935    if let Some((head, rest)) = line.split_once(':') {
936        match head.trim().to_ascii_lowercase().as_str() {
937            "unique" => {
938                let fields = rest
939                    .split(',')
940                    .map(|f| f.trim().to_string())
941                    .filter(|f| !f.is_empty())
942                    .collect();
943                return SchemaBullet::Unique(fields);
944            }
945            "summary_template" => {
946                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
947            }
948            _ => {}
949        }
950    }
951
952    SchemaBullet::Field(parse_field_spec(bullet_line))
953}
954
955/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
956/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
957/// in [`FieldSpec::unknown_modifiers`].
958pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
959    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
960    let line = bullet_line.trim();
961    let line = line
962        .strip_prefix("- ")
963        .or_else(|| line.strip_prefix("* "))
964        .or_else(|| line.strip_prefix("+ "))
965        .or_else(|| line.strip_prefix('-'))
966        .unwrap_or(line)
967        .trim();
968
969    // Split `<name> (<modifiers>)`. A bullet without parens is a free-form
970    // optional field of any shape — name only, no modifiers.
971    let (name, modifiers) = match line.find('(') {
972        Some(open) => {
973            let name = line[..open].trim().to_string();
974            let after = &line[open + 1..];
975            let mods = match after.rfind(')') {
976                Some(close) => &after[..close],
977                None => after, // tolerate a missing close paren
978            };
979            (name, mods.trim())
980        }
981        None => (line.to_string(), ""),
982    };
983
984    let mut spec = FieldSpec {
985        name,
986        ..FieldSpec::default()
987    };
988
989    if modifiers.is_empty() {
990        return spec;
991    }
992
993    // Modifiers are comma-separated. `enum:` is special: because its own value
994    // list contains commas, it must be last and swallows the remainder.
995    let raw: Vec<&str> = modifiers.split(',').collect();
996    let mut i = 0;
997    while i < raw.len() {
998        let token = raw[i].trim();
999        if token.is_empty() {
1000            i += 1;
1001            continue;
1002        }
1003        let lower = token.to_ascii_lowercase();
1004
1005        if lower == "required" {
1006            spec.required = true;
1007        } else if let Some(shape) = shape_from_str(&lower) {
1008            spec.shape = Some(shape);
1009        } else if let Some(rest) = lower.strip_prefix("link to ") {
1010            // The trailing slash is required in the source; store the prefix
1011            // without it so `Path::starts_with` comparisons are clean.
1012            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
1013            let _ = rest; // lowercase form only used for the keyword match
1014            spec.link_prefix = Some(PathBuf::from(prefix));
1015        } else if let Some(_rest) = lower.strip_prefix("default ") {
1016            // Value is everything after the keyword on this comma-token,
1017            // preserving original case.
1018            let value = token["default ".len()..].trim().to_string();
1019            spec.default = Some(Value::String(value));
1020        } else if lower.starts_with("enum:") || lower == "enum" {
1021            // Rejoin this token and every remaining token to recover the full
1022            // comma-separated value list.
1023            let mut joined = raw[i..].join(",");
1024            // Drop the leading `enum:` keyword (case-insensitive).
1025            if let Some(colon) = joined.find(':') {
1026                joined = joined[colon + 1..].to_string();
1027            }
1028            let values: Vec<String> = joined
1029                .split(',')
1030                .map(|v| v.trim().to_string())
1031                .filter(|v| !v.is_empty())
1032                .collect();
1033            spec.enum_values = Some(values);
1034            break; // enum consumed the rest of the line
1035        } else {
1036            // Unrecognized modifier — captured verbatim, surfaced as Info.
1037            spec.unknown_modifiers.push(token.to_string());
1038        }
1039        i += 1;
1040    }
1041
1042    spec
1043}
1044
1045// ── Private helpers ─────────────────────────────────────────────────────────
1046
1047/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1048/// is treated as absent; anything else must be an RFC3339 string.
1049fn parse_timestamp(
1050    value: &Value,
1051    key: &str,
1052    file: &Path,
1053) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1054    match value {
1055        Value::Null => Ok(None),
1056        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1057        other => Err(ParseError::BadTimestamp {
1058            file: file.to_path_buf(),
1059            key: key.to_string(),
1060            value: format!("{other:?}"),
1061        }),
1062    }
1063}
1064
1065/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1066fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1067    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1068        file: file.to_path_buf(),
1069        key: key.to_string(),
1070        value: s.to_string(),
1071    })
1072}
1073
1074/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1075/// (the canonical form) or a single scalar (coerced to a one-element list).
1076fn parse_tags(value: &Value) -> Vec<String> {
1077    match value {
1078        Value::Sequence(items) => items
1079            .iter()
1080            .filter_map(|v| match v {
1081                Value::String(s) => Some(s.clone()),
1082                Value::Number(n) => Some(n.to_string()),
1083                Value::Bool(b) => Some(b.to_string()),
1084                _ => None,
1085            })
1086            .collect(),
1087        Value::String(s) => vec![s.clone()],
1088        _ => Vec::new(),
1089    }
1090}
1091
1092/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1093/// location, or `None` if the string is not a bare wiki-link. Used for
1094/// frontmatter-valued links where there is no body position to report.
1095fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1096    let s = s.trim();
1097    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1098    // Reject anything with further brackets (e.g. the nested flow-form item),
1099    // which is not a clean single wiki-link.
1100    if inner.contains('[') || inner.contains(']') {
1101        return None;
1102    }
1103    let (target, display) = match inner.split_once('|') {
1104        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1105        None => (inner.to_string(), None),
1106    };
1107    Some(WikiLink {
1108        is_full_path: target_is_full_path(&target),
1109        has_md_extension: target_has_md_extension(&target),
1110        target,
1111        display,
1112        location: (PathBuf::new(), 0, 0),
1113    })
1114}
1115
1116/// Extract every wiki-link from a single frontmatter field value, accepting the
1117/// two canonical forms the spec defines (SPEC § Linking):
1118///
1119/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1120///   canonical unquoted inline (`f: [[x]]`) form, and
1121/// - a **list** field whose items are quoted wiki-link strings
1122///   (`- "[[x]]"`).
1123///
1124/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1125/// so the parsed [`Value`] shapes are not what one would naively expect:
1126///
1127/// | source                         | parsed `Value`                     | here |
1128/// |--------------------------------|------------------------------------|------|
1129/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1130/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1131/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1132/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1133///
1134/// The last row — an *unquoted list* — parses identically to the flow-form list
1135/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1136/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1137/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1138/// edge enumerator only ever sees the valid canonical forms.
1139///
1140/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1141/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1142/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1143fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1144    // Quoted scalar: `field: "[[x]]"`.
1145    if let Value::String(s) = value {
1146        return parse_wiki_link_str(s).into_iter().collect();
1147    }
1148    let Value::Sequence(items) = value else {
1149        return Vec::new();
1150    };
1151    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1152    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1153    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1154    if items.len() == 1 {
1155        if let Some(link) = unquoted_inline_link(&items[0]) {
1156            return vec![link];
1157        }
1158    }
1159    // Otherwise a list of quoted wiki-link strings; non-string items (the
1160    // unquoted-list mis-encoding) are left for validate to flag.
1161    items
1162        .iter()
1163        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1164        .collect()
1165}
1166
1167/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1168///
1169/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1170/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1171/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1172/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1173/// Re-emitting that verbatim yields the block sequence
1174///
1175/// ```text
1176/// company:
1177/// - - records/companies/northstar
1178/// ```
1179///
1180/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1181/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1182/// a value back into the canonical emitted form before it is written:
1183///
1184/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1185///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1186///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1187///   that [`links_in_field_value`] reads back as the same scalar link;
1188/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1189///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1190///   `set` write-in path and the canonical list form;
1191/// - everything else → returned verbatim (the common no-op for non-link values).
1192///
1193/// `|display` is preserved in both link branches. This is the single point that
1194/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1195/// corrupting a pre-existing canonical link, since they all funnel through
1196/// `to_yaml`.
1197fn canonicalize_extra_value(value: &Value) -> Value {
1198    match value {
1199        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1200        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1201        // YAML nested sequence). Non-link strings are returned untouched.
1202        Value::String(s) => match parse_wiki_link_str(s) {
1203            Some(link) => Value::String(wiki_link_literal(&link)),
1204            None => value.clone(),
1205        },
1206        Value::Sequence(items) => {
1207            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1208            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1209            // scalar string so the link is preserved rather than block-emitted.
1210            if items.len() == 1 {
1211                if let Some(link) = unquoted_inline_link(&items[0]) {
1212                    return Value::String(wiki_link_literal(&link));
1213                }
1214            }
1215            // List of wiki-links: re-emit as a block sequence of quoted-link
1216            // strings, the canonical list form `to_yaml` renders block-style and
1217            // `links_in_field_value` accepts. Only canonicalize when *every* item
1218            // is a clean single wiki-link; a list with any non-link item is left
1219            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1220            // validate flags) are untouched.
1221            let mut links = Vec::with_capacity(items.len());
1222            for item in items {
1223                match link_from_flow_list_item(item) {
1224                    Some(link) => links.push(link),
1225                    None => return value.clone(),
1226                }
1227            }
1228            if links.is_empty() {
1229                return value.clone();
1230            }
1231            Value::Sequence(
1232                links
1233                    .iter()
1234                    .map(|l| Value::String(wiki_link_literal(l)))
1235                    .collect(),
1236            )
1237        }
1238        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1239        _ => value.clone(),
1240    }
1241}
1242
1243/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1244/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1245fn wiki_link_literal(link: &WikiLink) -> String {
1246    match &link.display {
1247        Some(d) => format!("[[{}|{}]]", link.target, d),
1248        None => format!("[[{}]]", link.target),
1249    }
1250}
1251
1252/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1253/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1254/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1255/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1256/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1257/// from being mistaken for a wiki-link.
1258fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1259    let Value::Sequence(items) = v else {
1260        return None;
1261    };
1262    if items.len() != 1 {
1263        return None;
1264    }
1265    let s = items[0].as_str()?;
1266    // A clean unquoted wiki-link has no further brackets inside it.
1267    if s.contains('[') || s.contains(']') {
1268        return None;
1269    }
1270    parse_wiki_link_str(&format!("[[{s}]]"))
1271}
1272
1273/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1274/// wiki-links** that should be stored as a YAML block sequence, returning the
1275/// canonical `Value::Sequence` of quoted-link strings when so.
1276///
1277/// The value path of every write surface stringifies its argument; without this
1278/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1279/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1280/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1281/// two list spellings an agent naturally types and normalizes both to the block
1282/// form the canonical writer emits and `dbmd validate` accepts:
1283///
1284/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1285/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1286///
1287/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1288/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1289/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1290/// renders inline), an empty list, or a list with any non-link item. A single
1291/// link must stay scalar; only genuine multi-item-or-explicit lists become
1292/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1293/// validator never disagree.
1294fn parse_link_list_value(value: &str) -> Option<Value> {
1295    let trimmed = value.trim();
1296    // Only a YAML *flow sequence* literal is a list candidate; anything not
1297    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1298    // the single-inline-link guard below).
1299    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1300        return None;
1301    }
1302    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1303        return None;
1304    };
1305    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1306    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1307    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1308    // list. `links_in_field_value` reads it back as a scalar link either way.
1309    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1310        return None;
1311    }
1312    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1313    // spellings an agent types (see [`link_from_flow_list_item`]).
1314    let mut links = Vec::with_capacity(items.len());
1315    for item in &items {
1316        links.push(link_from_flow_list_item(item)?);
1317    }
1318    if links.is_empty() {
1319        return None;
1320    }
1321    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1322    // renders block-style and `links_in_field_value` accepts. `|display` is
1323    // preserved.
1324    let normalized = links
1325        .iter()
1326        .map(|l| Value::String(wiki_link_literal(l)))
1327        .collect();
1328    Some(Value::Sequence(normalized))
1329}
1330
1331/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1332/// across the spellings an agent types for a list. After top-level flow parsing,
1333/// a list item arrives in one of:
1334///
1335/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1336/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1337///   (one level deeper than a bare unquoted scalar, because the surrounding list
1338///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1339///   `Seq[String(x)]` with [`unquoted_inline_link`].
1340///
1341/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1342/// caller falls back to a scalar string and never fabricates a partial list.
1343fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1344    match item {
1345        Value::String(s) => parse_wiki_link_str(s),
1346        Value::Sequence(inner) => {
1347            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1348            // wrapper to expose the inline-link shape.
1349            if inner.len() == 1 {
1350                if let Some(link) = unquoted_inline_link(&inner[0]) {
1351                    return Some(link);
1352                }
1353            }
1354            // Defensive: also accept the inline-link shape directly.
1355            unquoted_inline_link(item)
1356        }
1357        _ => None,
1358    }
1359}
1360
1361/// A target is a full store-relative path when its first path segment is one of
1362/// the three canonical layer dirs and at least one `/` separator follows. A
1363/// trailing `.md` does not affect this classification.
1364fn target_is_full_path(target: &str) -> bool {
1365    let target = target.trim();
1366    match target.split_once('/') {
1367        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1368        None => false,
1369    }
1370}
1371
1372/// True when the target carries a trailing `.md` extension (validate warns
1373/// `WIKI_LINK_HAS_EXTENSION`).
1374fn target_has_md_extension(target: &str) -> bool {
1375    target.trim().ends_with(".md")
1376}
1377
1378/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1379fn char_column(line: &str, byte_offset: usize) -> u32 {
1380    (line[..byte_offset].chars().count() as u32) + 1
1381}
1382
1383/// Map a lowercase shape keyword to its [`Shape`].
1384fn shape_from_str(s: &str) -> Option<Shape> {
1385    match s {
1386        "string" => Some(Shape::String),
1387        "int" => Some(Shape::Int),
1388        "bool" => Some(Shape::Bool),
1389        "date" => Some(Shape::Date),
1390        "email" => Some(Shape::Email),
1391        "currency" => Some(Shape::Currency),
1392        "url" => Some(Shape::Url),
1393        _ => None,
1394    }
1395}
1396
1397/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1398/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1399/// end-of-line) after the `#` run, caps the run at six.
1400fn heading_level(line: &str) -> u8 {
1401    let indent = line.len() - line.trim_start_matches(' ').len();
1402    if indent > 3 {
1403        return 0;
1404    }
1405    let rest = &line[indent..];
1406    let hashes = rest.len() - rest.trim_start_matches('#').len();
1407    if hashes == 0 || hashes > 6 {
1408        return 0;
1409    }
1410    let after = &rest[hashes..];
1411    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1412        hashes as u8
1413    } else {
1414        0
1415    }
1416}
1417
1418/// The heading text after the `#` run, trimmed, with any trailing ATX closing
1419/// `#` sequence removed (`## Title ##` → `Title`).
1420fn heading_text(line: &str, level: u8) -> String {
1421    let indent = line.len() - line.trim_start_matches(' ').len();
1422    let after_hashes = &line[indent + level as usize..];
1423    let trimmed = after_hashes.trim();
1424    let no_trailing = trimmed.trim_end_matches('#');
1425    if no_trailing.len() == trimmed.len() {
1426        trimmed.to_string()
1427    } else {
1428        no_trailing.trim_end().to_string()
1429    }
1430}
1431
1432/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1433fn opening_fence(line: &str) -> Option<(u8, usize)> {
1434    let indent = line.len() - line.trim_start_matches(' ').len();
1435    if indent > 3 {
1436        return None;
1437    }
1438    let rest = &line[indent..];
1439    let byte = rest.bytes().next()?;
1440    if byte != b'`' && byte != b'~' {
1441        return None;
1442    }
1443    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1444    if run < 3 {
1445        return None;
1446    }
1447    // A backtick fence's info string may not itself contain a backtick.
1448    if byte == b'`' && rest[run..].contains('`') {
1449        return None;
1450    }
1451    Some((byte, run))
1452}
1453
1454/// True if `line` closes the currently open fence: same char, run at least as
1455/// long, nothing but trailing whitespace after.
1456fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1457    let (byte, open_len) = fence;
1458    let indent = line.len() - line.trim_start_matches(' ').len();
1459    if indent > 3 {
1460        return false;
1461    }
1462    let rest = &line[indent..];
1463    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1464    if run < open_len {
1465        return false;
1466    }
1467    rest[run..].trim().is_empty()
1468}
1469
1470/// The prose body of a section: everything after the heading line, trimmed.
1471fn section_prose(section_body: &str) -> String {
1472    match section_body.split_once('\n') {
1473        Some((_heading, rest)) => rest.trim().to_string(),
1474        None => String::new(),
1475    }
1476}
1477
1478/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1479/// line, each returned with its leading whitespace trimmed.
1480fn bullet_lines(section_body: &str) -> Vec<String> {
1481    section_body
1482        .lines()
1483        .skip(1) // the heading line
1484        .map(str::trim)
1485        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1486        .map(|l| l.to_string())
1487        .collect()
1488}
1489
1490/// Cut a bullet's content at the first ` — ` / ` -- ` comment separator,
1491/// returning only the meaningful prefix.
1492fn strip_bullet_comment(content: &str) -> &str {
1493    let mut cut = content.len();
1494    for sep in [" — ", " -- ", " – "] {
1495        if let Some(idx) = content.find(sep) {
1496            cut = cut.min(idx);
1497        }
1498    }
1499    content[..cut].trim()
1500}
1501
1502/// Strip the leading bullet marker, returning the trimmed content after it.
1503fn bullet_content(bullet: &str) -> &str {
1504    let t = bullet.trim();
1505    t.strip_prefix("- ")
1506        .or_else(|| t.strip_prefix("* "))
1507        .or_else(|| t.strip_prefix("+ "))
1508        .unwrap_or(t)
1509        .trim()
1510}
1511
1512/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1513/// wrapped in backticks and followed by an em-dash comment.
1514fn extract_path_bullet(bullet: &str) -> String {
1515    let content = bullet_content(bullet);
1516    // Prefer a backtick-delimited span if present.
1517    if let Some(start) = content.find('`') {
1518        if let Some(end_rel) = content[start + 1..].find('`') {
1519            return content[start + 1..start + 1 + end_rel].trim().to_string();
1520        }
1521    }
1522    // Otherwise take the text up to a comment separator, stripping quotes.
1523    strip_bullet_comment(content)
1524        .trim_matches('"')
1525        .trim_matches('\'')
1526        .trim()
1527        .to_string()
1528}
1529
1530/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1531/// backticks/quotes and any trailing em-dash comment.
1532fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1533    let content = strip_bullet_comment(bullet_content(bullet));
1534    content
1535        .split(',')
1536        .map(|t| {
1537            t.trim()
1538                .trim_matches('`')
1539                .trim_matches('"')
1540                .trim_matches('\'')
1541                .trim()
1542                .to_string()
1543        })
1544        .filter(|t| !t.is_empty())
1545        .collect()
1546}
1547
1548#[cfg(test)]
1549mod tests {
1550    use super::*;
1551    use std::path::Path;
1552    use tempfile::tempdir;
1553
1554    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1555
1556    #[test]
1557    fn frozen_match_is_md_insensitive_both_directions() {
1558        // A policy entry stored WITHOUT `.md` (the natural extensionless
1559        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1560        // write target — the regression every write surface had.
1561        let cfg = Config {
1562            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1563            ..Config::default()
1564        };
1565        assert_eq!(
1566            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1567            Some(PathBuf::from("records/decisions/q1")),
1568            "extensionless policy entry must freeze the .md file"
1569        );
1570        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1571
1572        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1573        let cfg = Config {
1574            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1575            ..Config::default()
1576        };
1577        assert_eq!(
1578            cfg.frozen_match(Path::new("records/decisions/q1")),
1579            Some(PathBuf::from("records/decisions/q1.md")),
1580        );
1581        // And the same-spelling cases still match.
1582        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1583    }
1584
1585    #[test]
1586    fn frozen_match_drops_leading_dot_slash() {
1587        let cfg = Config {
1588            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1589            ..Config::default()
1590        };
1591        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
1592        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
1593    }
1594
1595    #[test]
1596    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
1597        let cfg = Config {
1598            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1599            ..Config::default()
1600        };
1601        assert!(cfg
1602            .frozen_match(Path::new("records/decisions/q2.md"))
1603            .is_none());
1604        // A prefix is not a match: `q1` must not freeze `q1-draft`.
1605        assert!(cfg
1606            .frozen_match(Path::new("records/decisions/q1-draft.md"))
1607            .is_none());
1608        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
1609    }
1610
1611    // ── split_frontmatter ───────────────────────────────────────────────────
1612
1613    #[test]
1614    fn split_frontmatter_separates_yaml_and_verbatim_body() {
1615        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
1616        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1617        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
1618        // Body is everything after the closing fence's newline, byte-for-byte.
1619        assert_eq!(p.body, "# Heading\n\nBody line.\n");
1620    }
1621
1622    #[test]
1623    fn split_frontmatter_preserves_body_without_trailing_newline() {
1624        let text = "---\ntype: x\n---\nno trailing newline";
1625        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1626        assert_eq!(p.body, "no trailing newline");
1627    }
1628
1629    #[test]
1630    fn split_frontmatter_empty_body_when_nothing_after_fence() {
1631        let text = "---\ntype: x\n---\n";
1632        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1633        assert_eq!(p.body, "");
1634    }
1635
1636    #[test]
1637    fn split_frontmatter_missing_opening_fence_errors() {
1638        let text = "# No frontmatter here\ntype: x\n";
1639        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1640        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1641    }
1642
1643    #[test]
1644    fn split_frontmatter_leading_content_before_fence_rejected() {
1645        // The opening fence must be the very first line; a blank line first is
1646        // not allowed.
1647        let text = "\n---\ntype: x\n---\nbody";
1648        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1649        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1650    }
1651
1652    #[test]
1653    fn split_frontmatter_unterminated_block_errors() {
1654        let text = "---\ntype: x\nsummary: y\n";
1655        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1656        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1657    }
1658
1659    // ── Frontmatter::parse ───────────────────────────────────────────────────
1660
1661    #[test]
1662    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
1663        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
1664        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1665        assert_eq!(fm.type_.as_deref(), Some("contact"));
1666        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
1667        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
1668        assert_eq!(fm.status.as_deref(), Some("active"));
1669        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
1670        // Type-specific fields are NOT promoted to typed slots.
1671        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
1672        assert!(!fm.extra.contains_key("tags"));
1673        assert_eq!(
1674            fm.extra.get("email").and_then(|v| v.as_str()),
1675            Some("sarah@northstar.io")
1676        );
1677        assert_eq!(
1678            fm.extra.get("role").and_then(|v| v.as_str()),
1679            Some("Director")
1680        );
1681    }
1682
1683    #[test]
1684    fn parse_reads_rfc3339_timestamps() {
1685        let yaml =
1686            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
1687        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1688        let created = fm.created.expect("created parsed");
1689        // -07:00 offset is 7 * 3600 seconds west.
1690        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
1691        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
1692        assert!(fm.updated.is_some());
1693    }
1694
1695    #[test]
1696    fn parse_rejects_non_rfc3339_timestamp() {
1697        // A date-only value is not a full RFC3339 timestamp; created/updated
1698        // require the full form.
1699        let yaml = "type: email\ncreated: 2026-05-27";
1700        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1701        match err {
1702            ParseError::BadTimestamp { key, value, .. } => {
1703                assert_eq!(key, "created");
1704                assert_eq!(value, "2026-05-27");
1705            }
1706            other => panic!("expected BadTimestamp, got {other:?}"),
1707        }
1708    }
1709
1710    #[test]
1711    fn parse_malformed_yaml_errors() {
1712        // Unclosed flow mapping is invalid YAML.
1713        let yaml = "type: contact\n  bad: : :\n- nope";
1714        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1715        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1716    }
1717
1718    #[test]
1719    fn parse_empty_block_is_empty_frontmatter() {
1720        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
1721        assert_eq!(fm, Frontmatter::default());
1722    }
1723
1724    #[test]
1725    fn parse_scalar_top_level_is_malformed() {
1726        // A bare scalar at the top level is not a frontmatter mapping.
1727        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
1728        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1729    }
1730
1731    // ── to_yaml canonical order ──────────────────────────────────────────────
1732
1733    #[test]
1734    fn to_yaml_emits_canonical_key_order() {
1735        let mut fm = Frontmatter {
1736            type_: Some("contact".into()),
1737            id: Some("sarah-chen".into()),
1738            summary: Some("Director of Ops".into()),
1739            status: Some("active".into()),
1740            tags: vec!["vip".into()],
1741            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1742            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
1743            ..Default::default()
1744        };
1745        // Two type-specific fields, inserted in NON-alphabetical order to prove
1746        // the writer sorts them (BTreeMap) between the universal head and tail.
1747        fm.extra
1748            .insert("role".into(), Value::String("Director".into()));
1749        fm.extra.insert(
1750            "company".into(),
1751            Value::String("[[records/companies/northstar]]".into()),
1752        );
1753
1754        let yaml = fm.to_yaml();
1755        let keys: Vec<&str> = yaml
1756            .lines()
1757            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
1758            .map(|l| l.split(':').next().unwrap())
1759            .collect();
1760        assert_eq!(
1761            keys,
1762            vec![
1763                "type", "id", "created", "updated", "summary", // universal head
1764                "company", "role",   // type-specific, sorted
1765                "status", // universal tail
1766                "tags",
1767            ],
1768            "canonical order violated; got:\n{yaml}"
1769        );
1770        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
1771        assert!(
1772            yaml.contains("2026-05-27T08:00:00-07:00"),
1773            "created timestamp missing; got:\n{yaml}"
1774        );
1775        // The value re-parses to the same instant regardless of quoting.
1776        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
1777        assert_eq!(reparsed.created, fm.created);
1778        assert_eq!(reparsed.updated, fm.updated);
1779    }
1780
1781    #[test]
1782    fn to_yaml_omits_absent_optional_fields() {
1783        let fm = Frontmatter {
1784            type_: Some("note".into()),
1785            ..Default::default()
1786        };
1787        let yaml = fm.to_yaml();
1788        assert!(yaml.contains("type: note"));
1789        assert!(!yaml.contains("status"));
1790        assert!(!yaml.contains("tags"));
1791        assert!(!yaml.contains("summary"));
1792    }
1793
1794    #[test]
1795    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
1796        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
1797        // *unquoted* inline `company: [[records/companies/northstar]]`
1798        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
1799        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
1800        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
1801        //     company:
1802        //     - - records/companies/northstar
1803        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
1804        // any `fm set` / `link` write) silently destroyed the link.
1805        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
1806        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1807        // Sanity: it really parsed as the nested sequence, not a string.
1808        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
1809
1810        let out = fm.to_yaml();
1811        // The link must survive as a quoted inline scalar — brackets intact, and
1812        // never the bracket-less block sequence `- - records/...`.
1813        assert!(
1814            out.contains("[[records/companies/northstar]]"),
1815            "canonical writer dropped the wiki-link brackets; got:\n{out}"
1816        );
1817        assert!(
1818            !out.contains("- - "),
1819            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
1820        );
1821
1822        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
1823        // one link with the right target (the edge graph/backlinks rely on).
1824        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1825        let fields = reparsed.link_fields();
1826        let links: Vec<(&str, &str, Option<&str>)> = fields
1827            .iter()
1828            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
1829            .collect();
1830        assert_eq!(
1831            links,
1832            vec![("company", "records/companies/northstar", None)]
1833        );
1834
1835        // A second re-emit is a fixed point — no progressive corruption across
1836        // repeated curator-loop writes.
1837        assert_eq!(
1838            reparsed.to_yaml(),
1839            out,
1840            "to_yaml is not idempotent on links"
1841        );
1842    }
1843
1844    #[test]
1845    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
1846        // The `|display` segment must survive the unquoted-inline round-trip too.
1847        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
1848        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1849        let out = fm.to_yaml();
1850        assert!(
1851            out.contains("[[records/companies/northstar|Northstar]]"),
1852            "display segment lost on round-trip; got:\n{out}"
1853        );
1854        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1855        let f = reparsed.link_fields();
1856        assert_eq!(f.len(), 1);
1857        assert_eq!(f[0].1.target, "records/companies/northstar");
1858        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
1859    }
1860
1861    #[test]
1862    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
1863        // A genuine quoted block list of links round-trips as a clean string
1864        // list — never collapsed to a scalar — and a plain nested sequence that
1865        // is NOT a wiki-link is left exactly as written (no false conversion).
1866        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
1867        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
1868        let out = fm.to_yaml();
1869
1870        // Both attendee links survive as quoted strings.
1871        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
1872        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
1873
1874        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
1875        let fields = reparsed.link_fields();
1876        let attendees: Vec<&str> = fields
1877            .iter()
1878            .filter(|(k, _)| k == "attendees")
1879            .map(|(_, l)| l.target.as_str())
1880            .collect();
1881        assert_eq!(
1882            attendees,
1883            vec!["records/contacts/elena", "records/contacts/sarah"]
1884        );
1885        // The non-link nested sequence is preserved verbatim, not touched.
1886        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
1887    }
1888
1889    // ── read_file / write_file round-trip ────────────────────────────────────
1890
1891    #[test]
1892    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
1893        let dir = tempdir().unwrap();
1894        let path = dir.path().join("sources/emails/x.md");
1895        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
1896        let mut fm = Frontmatter {
1897            type_: Some("email".into()),
1898            summary: Some("renewal note".into()),
1899            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1900            ..Default::default()
1901        };
1902        fm.extra
1903            .insert("from".into(), Value::String("elena@northstar.io".into()));
1904
1905        write_file(&path, &fm, body).unwrap();
1906
1907        let (read_fm, read_body) = read_file(&path).unwrap();
1908        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
1909        assert_eq!(read_fm.type_.as_deref(), Some("email"));
1910        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
1911        assert_eq!(
1912            read_fm.extra.get("from").and_then(|v| v.as_str()),
1913            Some("elena@northstar.io")
1914        );
1915        // The on-disk file starts with a fence and ends with the verbatim body.
1916        let raw = std::fs::read_to_string(&path).unwrap();
1917        assert!(raw.starts_with("---\n"));
1918        assert!(raw.ends_with(body));
1919    }
1920
1921    #[test]
1922    fn roundtrip_modify_summary_then_write_changes_only_summary() {
1923        let dir = tempdir().unwrap();
1924        let path = dir.path().join("records/contacts/sarah.md");
1925        let body = "Long-form operator notes about Sarah.\n";
1926        let fm = Frontmatter {
1927            type_: Some("contact".into()),
1928            summary: Some("old summary".into()),
1929            ..Default::default()
1930        };
1931        write_file(&path, &fm, body).unwrap();
1932
1933        // Read → modify summary → write back.
1934        let (mut fm2, body2) = read_file(&path).unwrap();
1935        fm2.summary = Some("new summary".into());
1936        write_file(&path, &fm2, &body2).unwrap();
1937
1938        let (fm3, body3) = read_file(&path).unwrap();
1939        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
1940        assert_eq!(fm3.type_.as_deref(), Some("contact"));
1941        assert_eq!(body3, body, "body unchanged across the round-trip");
1942    }
1943
1944    #[test]
1945    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
1946        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
1947        // a hand-written file carrying the canonical UNQUOTED scalar link
1948        // `company: [[records/companies/northstar]]`, read from disk then written
1949        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
1950        // value to the bracket-less block sequence `company:\n- - records/...`,
1951        // and every reader (validate/graph/backlinks) then lost the edge.
1952        let dir = tempdir().unwrap();
1953        let path = dir.path().join("records/contacts/sarah-chen.md");
1954        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
1955        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
1956        std::fs::write(&path, file).unwrap();
1957
1958        // Read → write back unchanged (the canonical no-op re-emit).
1959        let (fm, body) = read_file(&path).unwrap();
1960        write_file(&path, &fm, &body).unwrap();
1961
1962        // On-disk bytes still carry the bracketed link, never `- - records/...`.
1963        let raw = std::fs::read_to_string(&path).unwrap();
1964        assert!(
1965            raw.contains("[[records/companies/northstar]]"),
1966            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
1967        );
1968        assert!(
1969            !raw.contains("- - "),
1970            "on-disk value became a nested block sequence; got:\n{raw}"
1971        );
1972
1973        // And the edge is still readable after the round-trip.
1974        let (fm2, _) = read_file(&path).unwrap();
1975        let fields = fm2.link_fields();
1976        let links: Vec<(&str, &str)> = fields
1977            .iter()
1978            .map(|(k, l)| (k.as_str(), l.target.as_str()))
1979            .collect();
1980        assert_eq!(links, vec![("company", "records/companies/northstar")]);
1981    }
1982
1983    #[test]
1984    fn write_file_does_not_leave_temp_files_behind() {
1985        let dir = tempdir().unwrap();
1986        let path = dir.path().join("records/x.md");
1987        let fm = Frontmatter {
1988            type_: Some("note".into()),
1989            ..Default::default()
1990        };
1991        write_file(&path, &fm, "body\n").unwrap();
1992        // The directory should contain only the target file, no `.x.md.tmp.*`.
1993        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
1994            .unwrap()
1995            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
1996            .collect();
1997        assert_eq!(entries, vec!["x.md".to_string()]);
1998    }
1999
2000    // ── is_content_file ──────────────────────────────────────────────────────
2001
2002    #[test]
2003    fn is_content_file_recognizes_layers_and_excludes_meta() {
2004        assert!(Frontmatter::is_content_file(Path::new(
2005            "sources/emails/2026-05-22.md"
2006        )));
2007        assert!(Frontmatter::is_content_file(Path::new(
2008            "records/contacts/sarah-chen.md"
2009        )));
2010        assert!(Frontmatter::is_content_file(Path::new(
2011            "wiki/people/sarah-chen.md"
2012        )));
2013        // Absolute paths under a layer are still content.
2014        assert!(Frontmatter::is_content_file(Path::new(
2015            "/home/db/records/companies/northstar.md"
2016        )));
2017        // index.md at any level is meta.
2018        assert!(!Frontmatter::is_content_file(Path::new(
2019            "records/contacts/index.md"
2020        )));
2021        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2022        // Root meta files.
2023        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2024        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2025    }
2026
2027    // ── effective_id ─────────────────────────────────────────────────────────
2028
2029    #[test]
2030    fn effective_id_prefers_explicit_then_derives_from_path() {
2031        let with_id = Frontmatter {
2032            id: Some("explicit-id".into()),
2033            ..Default::default()
2034        };
2035        assert_eq!(
2036            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2037            "explicit-id"
2038        );
2039        let no_id = Frontmatter::default();
2040        assert_eq!(
2041            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2042            "sarah-chen"
2043        );
2044    }
2045
2046    // ── get / set ────────────────────────────────────────────────────────────
2047
2048    #[test]
2049    fn set_routes_universal_and_custom_keys() {
2050        let mut fm = Frontmatter::default();
2051        fm.set("type", "contact").unwrap();
2052        fm.set("summary", "hi").unwrap();
2053        fm.set("company", "[[records/companies/northstar]]")
2054            .unwrap();
2055        assert_eq!(fm.type_.as_deref(), Some("contact"));
2056        assert_eq!(fm.summary.as_deref(), Some("hi"));
2057        // Custom key landed in extra, not a typed slot.
2058        assert_eq!(
2059            fm.extra.get("company").and_then(|v| v.as_str()),
2060            Some("[[records/companies/northstar]]")
2061        );
2062        // get reads from both typed fields and extra.
2063        assert_eq!(
2064            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2065            Some("contact".into())
2066        );
2067        assert_eq!(
2068            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2069            Some("[[records/companies/northstar]]".into())
2070        );
2071        assert!(fm.get("nonexistent").is_none());
2072    }
2073
2074    #[test]
2075    fn set_timestamp_validates_rfc3339() {
2076        let mut fm = Frontmatter::default();
2077        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2078        assert!(fm.created.is_some());
2079        let err = fm.set("updated", "not-a-date").unwrap_err();
2080        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2081    }
2082
2083    // ── extract_wiki_links ───────────────────────────────────────────────────
2084
2085    #[test]
2086    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2087        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[wiki/people/sarah-chen.md|Sarah]].\n";
2088        let links = extract_wiki_links(body, Path::new("doc.md"));
2089        assert_eq!(links.len(), 3);
2090
2091        // Full path, no extension, no display.
2092        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2093        assert!(links[0].is_full_path);
2094        assert!(!links[0].has_md_extension);
2095        assert_eq!(links[0].display, None);
2096        assert_eq!(links[0].location.1, 1, "first link on line 1");
2097
2098        // Short form: not a full path.
2099        assert_eq!(links[1].target, "sarah-chen");
2100        assert!(!links[1].is_full_path, "bare target is short-form");
2101
2102        // Full path WITH .md extension and a display override on line 2.
2103        assert_eq!(links[2].target, "wiki/people/sarah-chen.md");
2104        assert!(links[2].is_full_path);
2105        assert!(links[2].has_md_extension);
2106        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2107        assert_eq!(links[2].location.1, 2);
2108    }
2109
2110    #[test]
2111    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2112        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2113        let body = "café [[records/x/y]]";
2114        let links = extract_wiki_links(body, Path::new("d.md"));
2115        assert_eq!(links.len(), 1);
2116        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2117        assert_eq!(links[0].location.2, 6);
2118    }
2119
2120    #[test]
2121    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2122        let links = extract_wiki_links(
2123            "records/contacts/sarah-chen is not a link",
2124            Path::new("d.md"),
2125        );
2126        assert!(links.is_empty());
2127    }
2128
2129    // ── extract_markdown_links ───────────────────────────────────────────────
2130
2131    #[test]
2132    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2133        let body =
2134            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2135        let md = extract_markdown_links(body, Path::new("d.md"));
2136        assert_eq!(
2137            md.len(),
2138            1,
2139            "wiki-link must not be captured as a markdown link"
2140        );
2141        assert_eq!(md[0].text, "the thread");
2142        assert_eq!(md[0].url, "https://x.com/a");
2143        assert_eq!(md[0].location.1, 1);
2144
2145        // And the wiki-link extractor must not pick up the markdown link.
2146        let wl = extract_wiki_links(body, Path::new("d.md"));
2147        assert_eq!(wl.len(), 1);
2148        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2149    }
2150
2151    // ── link_fields ──────────────────────────────────────────────────────────
2152
2153    #[test]
2154    fn link_fields_extracts_scalar_list_and_summary_links() {
2155        // The canonical list form quotes each item so YAML parses it as clean
2156        // strings; a scalar field may be quoted OR written in the canonical
2157        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2158        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2159        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2160        // Sanity: company really did parse as a scalar string here.
2161        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2162        let fields = fm.link_fields();
2163
2164        // company (scalar) once, with the right target.
2165        let company: Vec<&str> = fields
2166            .iter()
2167            .filter(|(k, _)| k == "company")
2168            .map(|(_, l)| l.target.as_str())
2169            .collect();
2170        assert_eq!(company, vec!["records/companies/northstar"]);
2171        // attendees (block list) twice.
2172        let attendees: Vec<&str> = fields
2173            .iter()
2174            .filter(|(k, _)| k == "attendees")
2175            .map(|(_, l)| l.target.as_str())
2176            .collect();
2177        assert_eq!(
2178            attendees,
2179            vec!["records/contacts/elena", "records/contacts/sarah"]
2180        );
2181        // summary link surfaced.
2182        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2183        // Plain-text field is not a link.
2184        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2185    }
2186
2187    #[test]
2188    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2189        // Regression: the canonical scalar wiki-link form is the *unquoted*
2190        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2191        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2192        // a naive `as_str()`-only walk drops it. link_fields() must still
2193        // surface exactly one link with the correct target.
2194        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2195        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2196        // Sanity: it really did parse as the nested sequence form, NOT a string.
2197        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2198
2199        let fields = fm.link_fields();
2200        let links: Vec<(&str, &str, Option<&str>)> = fields
2201            .iter()
2202            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2203            .collect();
2204        assert_eq!(
2205            links,
2206            vec![("company", "records/companies/northstar", None)]
2207        );
2208
2209        // The `|display` segment survives the unquoted inline form too.
2210        let fm2 = Frontmatter::parse(
2211            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2212            Path::new("m.md"),
2213        )
2214        .unwrap();
2215        let f2 = fm2.link_fields();
2216        assert_eq!(f2.len(), 1);
2217        assert_eq!(f2[0].0, "company");
2218        assert_eq!(f2[0].1.target, "records/companies/northstar");
2219        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2220    }
2221
2222    #[test]
2223    fn link_fields_ignores_plain_one_item_flow_list() {
2224        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2225        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2226        // NOT be mistaken for a wiki-link.
2227        let yaml = "type: contact\naliases: [foo]";
2228        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2229        assert_eq!(fm.link_fields(), Vec::new());
2230    }
2231
2232    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2233
2234    #[test]
2235    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2236        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2237        // inline wiki-link (double-nested) is NOT.
2238        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2239        let flagged = detect_flow_form_link_lists(bad);
2240        assert_eq!(flagged, vec!["attendees".to_string()]);
2241
2242        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2243        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2244        assert_eq!(
2245            detect_flow_form_link_lists(unquoted_block),
2246            vec!["attendees".to_string()]
2247        );
2248
2249        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2250        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2251        assert!(detect_flow_form_link_lists(good).is_empty());
2252
2253        // A plain scalar list of strings is not flagged.
2254        let plain = "tags: [a, b, c]";
2255        assert!(detect_flow_form_link_lists(plain).is_empty());
2256    }
2257
2258    // ── extract_sections ─────────────────────────────────────────────────────
2259
2260    #[test]
2261    fn extract_sections_levels_nesting_and_boundaries() {
2262        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2263        let secs = extract_sections(body);
2264        let headings: Vec<(&str, u8)> =
2265            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2266        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2267
2268        // "First" (H2) body extends through its H3 child, stopping at "Second".
2269        let first = &secs[0];
2270        assert!(first.body.contains("alpha"));
2271        assert!(first.body.contains("### Sub"));
2272        assert!(first.body.contains("beta"));
2273        assert!(!first.body.contains("Second"));
2274
2275        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2276        let sub = &secs[1];
2277        assert!(sub.body.contains("beta"));
2278        assert!(!sub.body.contains("gamma"));
2279
2280        // 1-based line numbers within the body.
2281        assert_eq!(first.line, 2);
2282        assert_eq!(secs[2].line, 6);
2283    }
2284
2285    #[test]
2286    fn extract_sections_ignores_headings_in_fenced_code() {
2287        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2288        let secs = extract_sections(body);
2289        assert_eq!(secs.len(), 1);
2290        assert_eq!(secs[0].heading, "Real");
2291        // The fenced "## Fake" is part of Real's body, not its own section.
2292        assert!(secs[0].body.contains("## Fake heading in code"));
2293    }
2294
2295    // ── parse_field_spec ─────────────────────────────────────────────────────
2296
2297    #[test]
2298    fn parse_field_spec_required_and_shape() {
2299        let f = parse_field_spec("- email (required, email)");
2300        assert_eq!(f.name, "email");
2301        assert!(f.required);
2302        assert_eq!(f.shape, Some(Shape::Email));
2303        assert!(f.unknown_modifiers.is_empty());
2304    }
2305
2306    #[test]
2307    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2308        let f = parse_field_spec("- company (required, link to records/companies/)");
2309        assert!(f.required);
2310        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2311        assert_eq!(f.shape, None);
2312    }
2313
2314    #[test]
2315    fn parse_field_spec_default_preserves_case_and_value() {
2316        let f = parse_field_spec("- currency (default USD)");
2317        assert_eq!(f.name, "currency");
2318        assert_eq!(f.default, Some(Value::String("USD".into())));
2319    }
2320
2321    #[test]
2322    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2323        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2324        assert!(f.required);
2325        assert_eq!(
2326            f.enum_values,
2327            Some(vec![
2328                "open".to_string(),
2329                "closed".to_string(),
2330                "pending".to_string()
2331            ])
2332        );
2333    }
2334
2335    #[test]
2336    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2337        let f = parse_field_spec("- weird (required, frobnicate, string)");
2338        assert!(f.required);
2339        assert_eq!(f.shape, Some(Shape::String));
2340        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2341    }
2342
2343    #[test]
2344    fn parse_field_spec_no_parens_is_freeform_optional() {
2345        let f = parse_field_spec("- nickname");
2346        assert_eq!(f.name, "nickname");
2347        assert!(!f.required);
2348        assert_eq!(f.shape, None);
2349        assert!(f.link_prefix.is_none());
2350        assert!(f.enum_values.is_none());
2351        assert!(f.unknown_modifiers.is_empty());
2352    }
2353
2354    // ── parse_schema_bullet (directives) ─────────────────────────────────────
2355
2356    #[test]
2357    fn schema_bullet_unique_single_field() {
2358        match parse_schema_bullet("- unique: email") {
2359            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
2360            other => panic!("expected Unique, got {other:?}"),
2361        }
2362    }
2363
2364    #[test]
2365    fn schema_bullet_unique_compound_trims_and_splits() {
2366        match parse_schema_bullet("- unique: date, amount , vendor") {
2367            SchemaBullet::Unique(fields) => assert_eq!(
2368                fields,
2369                vec![
2370                    "date".to_string(),
2371                    "amount".to_string(),
2372                    "vendor".to_string()
2373                ]
2374            ),
2375            other => panic!("expected Unique, got {other:?}"),
2376        }
2377    }
2378
2379    #[test]
2380    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
2381        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
2382            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
2383            other => panic!("expected SummaryTemplate, got {other:?}"),
2384        }
2385    }
2386
2387    #[test]
2388    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
2389        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
2390        // a directive — its head has a `(` before any `:`.
2391        match parse_schema_bullet("- status (enum: open, closed)") {
2392            SchemaBullet::Field(f) => {
2393                assert_eq!(f.name, "status");
2394                assert_eq!(
2395                    f.enum_values,
2396                    Some(vec!["open".to_string(), "closed".to_string()])
2397                );
2398            }
2399            other => panic!("expected Field, got {other:?}"),
2400        }
2401    }
2402
2403    #[test]
2404    fn parse_db_md_schema_captures_unique_and_summary_template() {
2405        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
2406        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2407        let s = config.schemas.get("contact").expect("contact schema");
2408        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
2409        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
2410        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
2411    }
2412
2413    // ── parse_db_md ──────────────────────────────────────────────────────────
2414
2415    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2416
2417    #[test]
2418    fn parse_db_md_extracts_all_canonical_sections() {
2419        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2420
2421        // Agent instructions: free-form prose, heading line stripped.
2422        let ai = config
2423            .agent_instructions
2424            .expect("agent instructions present");
2425        assert!(ai.starts_with("Prioritize creating"));
2426        assert!(!ai.contains("## Agent instructions"));
2427
2428        // Frozen pages: paths extracted from backticked bullets, comments dropped.
2429        assert_eq!(
2430            config.frozen_pages,
2431            vec![
2432                PathBuf::from("records/decisions/2026-q1-strategy.md"),
2433                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
2434            ]
2435        );
2436
2437        // Ignored types: comma list, backticks/comment stripped.
2438        assert_eq!(
2439            config.ignored_types,
2440            vec!["test".to_string(), "temp".to_string()]
2441        );
2442
2443        // Schemas: two types, each with its fields in source order.
2444        assert_eq!(config.schemas.len(), 2);
2445        let contact = config.schemas.get("contact").expect("contact schema");
2446        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
2447        assert_eq!(names, vec!["name", "email", "company", "role"]);
2448        assert!(contact.fields[0].required); // name
2449        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
2450        assert_eq!(
2451            contact.fields[2].link_prefix,
2452            Some(PathBuf::from("records/companies"))
2453        ); // company
2454
2455        let expense = config.schemas.get("expense").expect("expense schema");
2456        let cur = expense
2457            .fields
2458            .iter()
2459            .find(|f| f.name == "currency")
2460            .unwrap();
2461        assert_eq!(cur.default, Some(Value::String("USD".into())));
2462    }
2463
2464    #[test]
2465    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
2466        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
2467        // unknown modifier, and bullets that appear with NO `### <type>`
2468        // heading (so they belong to no schema and are dropped).
2469        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
2470        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2471
2472        // The orphan bullet under `## Schemas` with no `### type` heading is not
2473        // captured as a schema.
2474        assert_eq!(config.schemas.len(), 1);
2475        let ticket = config.schemas.get("ticket").expect("ticket schema");
2476        assert_eq!(ticket.fields.len(), 2);
2477
2478        let priority = &ticket.fields[0];
2479        assert!(priority.required);
2480        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
2481        assert_eq!(
2482            priority.enum_values,
2483            Some(vec!["low".to_string(), "high".to_string()])
2484        );
2485
2486        // A bullet with an unclosed paren still yields a usable name.
2487        let broken = &ticket.fields[1];
2488        assert_eq!(broken.name, "broken");
2489    }
2490
2491    #[test]
2492    fn parse_db_md_missing_frontmatter_errors() {
2493        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
2494        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
2495        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2496    }
2497
2498    #[test]
2499    fn parse_db_md_absent_sections_default_empty() {
2500        let text = "---\ntype: db-md\n---\n\n# Title only\n";
2501        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2502        assert_eq!(config, Config::default());
2503    }
2504
2505    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
2506
2507    /// `Frontmatter::set` is the value path every write surface (`fm set`,
2508    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
2509    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
2510    /// quoted links — readable back by [`links_in_field_value`] and accepted by
2511    /// `dbmd validate` — never the flow-form scalar string that trips
2512    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
2513    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
2514    #[test]
2515    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
2516        for value in [
2517            "[[[records/contacts/a]], [[records/contacts/b]]]",
2518            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
2519        ] {
2520            let mut fm = Frontmatter::default();
2521            fm.set("attendees", value).unwrap();
2522
2523            // Stored as a 2-element sequence of clean quoted links.
2524            let stored = fm.extra.get("attendees").expect("attendees set");
2525            let Value::Sequence(items) = stored else {
2526                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
2527            };
2528            assert_eq!(items.len(), 2, "input {value}");
2529            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
2530            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
2531
2532            // The edge enumerator reads exactly the two links back (no stray
2533            // bracket targets, the flow-form-string symptom).
2534            let links: Vec<_> = links_in_field_value(stored)
2535                .into_iter()
2536                .map(|l| l.target)
2537                .collect();
2538            assert_eq!(
2539                links,
2540                vec!["records/contacts/a", "records/contacts/b"],
2541                "input {value}"
2542            );
2543
2544            // And the canonical writer renders it block-style, not as a scalar.
2545            let yaml = fm.to_yaml();
2546            assert!(
2547                yaml.contains("attendees:\n"),
2548                "expected block list in:\n{yaml}"
2549            );
2550            assert!(
2551                !yaml.contains("attendees: '[["),
2552                "must not be a flow-form scalar string in:\n{yaml}"
2553            );
2554        }
2555    }
2556
2557    /// A *single* inline wiki-link stays a scalar string (renders inline
2558    /// `field: [[x]]`), and a single link must never be widened to a one-item
2559    /// list — preserving the common `contact.company` / `expense.vendor` shape.
2560    #[test]
2561    fn set_single_inline_wiki_link_stays_scalar() {
2562        let mut fm = Frontmatter::default();
2563        fm.set("company", "[[records/companies/tideform]]").unwrap();
2564        assert_eq!(
2565            fm.extra.get("company"),
2566            Some(&Value::String("[[records/companies/tideform]]".into())),
2567        );
2568        // Still recognized as one link.
2569        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
2570            .into_iter()
2571            .map(|l| l.target)
2572            .collect();
2573        assert_eq!(links, vec!["records/companies/tideform"]);
2574    }
2575
2576    /// Plain text and a non-link flow list are left as verbatim scalar strings —
2577    /// the list normalization only triggers when every item is a clean wiki-link.
2578    #[test]
2579    fn set_non_link_values_stay_scalar_strings() {
2580        let mut fm = Frontmatter::default();
2581        fm.set("location", "Video call (remote)").unwrap();
2582        assert_eq!(
2583            fm.extra.get("location"),
2584            Some(&Value::String("Video call (remote)".into())),
2585        );
2586
2587        // A flow list whose items are NOT wiki-links must not be reinterpreted as
2588        // a link sequence; it stays the scalar string the agent passed.
2589        fm.set("note", "[draft, wip]").unwrap();
2590        assert_eq!(
2591            fm.extra.get("note"),
2592            Some(&Value::String("[draft, wip]".into()))
2593        );
2594    }
2595}