Skip to main content

dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The three canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 3] = ["sources", "records", "wiki"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `id` — optional; derived from the file path when absent.
81    pub id: Option<String>,
82    /// `created` — RFC3339; required and auto-set on content-file create.
83    pub created: Option<DateTime<FixedOffset>>,
84    /// `updated` — RFC3339; required and auto-maintained on content files.
85    pub updated: Option<DateTime<FixedOffset>>,
86    /// `summary` — the one-line catalog line; required on every content file.
87    pub summary: Option<String>,
88    /// `status` — optional lifecycle state.
89    pub status: Option<String>,
90    /// `tags` — optional flat list of short scalar labels.
91    pub tags: Vec<String>,
92    /// All other frontmatter keys (type-specific + custom), preserved verbatim
93    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
94    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
95    /// [`WikiLink`]s.
96    pub extra: BTreeMap<String, Value>,
97}
98
99impl Frontmatter {
100    /// Parse a YAML frontmatter block (the text between the opening and closing
101    /// `---` fences, exclusive) into a [`Frontmatter`].
102    ///
103    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
104    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
105    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
106        // An empty (or whitespace-only) frontmatter block is a valid, empty
107        // mapping — not a YAML error.
108        let value: Value = if yaml.trim().is_empty() {
109            Value::Mapping(Mapping::new())
110        } else {
111            serde_norway::from_str(yaml).map_err(|source| ParseError::MalformedYaml {
112                file: file.to_path_buf(),
113                source,
114            })?
115        };
116
117        // Top-level frontmatter must be a mapping. A scalar or sequence at the
118        // top level is malformed for our purposes; surface it as such.
119        let map = match value {
120            Value::Mapping(m) => m,
121            Value::Null => Mapping::new(),
122            other => {
123                // serde_norway::Error has no public constructor, so let the
124                // deserializer decide: a value that coerces to a Mapping (e.g. a
125                // YAML-tagged mapping `!tag\n k: v`, where the tag is ambient) is
126                // accepted as that mapping; a genuine scalar or sequence top
127                // level fails to coerce and IS the malformed case. (Using a
128                // match here, not `expect_err`, avoids a panic on the
129                // tagged-mapping case, which deserializes to a Mapping just
130                // fine.)
131                match serde_norway::from_value::<Mapping>(other) {
132                    Ok(m) => m,
133                    Err(source) => {
134                        return Err(ParseError::MalformedYaml {
135                            file: file.to_path_buf(),
136                            source,
137                        });
138                    }
139                }
140            }
141        };
142
143        let mut fm = Frontmatter::default();
144        for (k, v) in map {
145            let key = match k.as_str() {
146                Some(s) => s.to_string(),
147                // Non-string keys are unusual; stringify defensively and keep
148                // them in `extra` so nothing is silently dropped.
149                None => format!("{k:?}"),
150            };
151            match key.as_str() {
152                "type" => fm.type_ = v.as_str().map(str::to_string),
153                "id" => fm.id = v.as_str().map(str::to_string),
154                "created" => fm.created = parse_timestamp(&v, "created", file)?,
155                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
156                "summary" => fm.summary = v.as_str().map(str::to_string),
157                "status" => fm.status = v.as_str().map(str::to_string),
158                "tags" => fm.tags = parse_tags(&v),
159                _ => {
160                    fm.extra.insert(key, v);
161                }
162            }
163        }
164        Ok(fm)
165    }
166
167    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
168    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
169    pub fn to_yaml(&self) -> String {
170        // Build an order-preserving mapping in canonical key order:
171        //   type, id, created, updated, summary  (universal head)
172        //   <type-specific extra, BTreeMap-sorted>
173        //   status, tags                          (universal tail)
174        // serde_norway::Mapping preserves insertion order, so one serialize call
175        // emits the block in exactly this order with correct YAML quoting.
176        let mut map = Mapping::new();
177
178        if let Some(t) = &self.type_ {
179            map.insert(Value::String("type".into()), Value::String(t.clone()));
180        }
181        if let Some(id) = &self.id {
182            map.insert(Value::String("id".into()), Value::String(id.clone()));
183        }
184        if let Some(created) = &self.created {
185            map.insert(
186                Value::String("created".into()),
187                Value::String(created.to_rfc3339()),
188            );
189        }
190        if let Some(updated) = &self.updated {
191            map.insert(
192                Value::String("updated".into()),
193                Value::String(updated.to_rfc3339()),
194            );
195        }
196        if let Some(summary) = &self.summary {
197            map.insert(
198                Value::String("summary".into()),
199                Value::String(summary.clone()),
200            );
201        }
202
203        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
204        // is canonicalized so a wiki-link round-trips to the form the writer and
205        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
206        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
207        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
208        // block sequence `- - x` that a verbatim re-emit would produce and that
209        // destroys the link. See [`canonicalize_extra_value`].
210        for (k, v) in &self.extra {
211            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
212        }
213
214        if let Some(status) = &self.status {
215            map.insert(
216                Value::String("status".into()),
217                Value::String(status.clone()),
218            );
219        }
220        if !self.tags.is_empty() {
221            map.insert(
222                Value::String("tags".into()),
223                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
224            );
225        }
226
227        if map.is_empty() {
228            return String::new();
229        }
230        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
231    }
232
233    /// True if the file is content (under `sources/`, `records/`, or `wiki/`)
234    /// and not an `index.md`. Used by validate to decide which files require a
235    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
236    pub fn is_content_file(path: &Path) -> bool {
237        // index.md is a meta file at every level, never content.
238        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
239            return false;
240        }
241        // Content iff some path component is one of the three layer dirs. This
242        // works for both store-relative (`sources/emails/x.md`) and absolute
243        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
244        // root, under no layer, so they fall through to false.
245        path.components().any(|c| {
246            c.as_os_str()
247                .to_str()
248                .is_some_and(|s| LAYER_DIRS.contains(&s))
249        })
250    }
251
252    /// Resolve the file's effective `id`: the explicit `id` field if present,
253    /// otherwise derived from the store-relative path (filename without `.md`).
254    pub fn effective_id(&self, store_relative_path: &Path) -> String {
255        if let Some(id) = &self.id {
256            if !id.is_empty() {
257                return id.clone();
258            }
259        }
260        // Derived id = filename without the `.md` extension.
261        store_relative_path
262            .file_stem()
263            .and_then(|s| s.to_str())
264            .unwrap_or_default()
265            .to_string()
266    }
267
268    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
269    /// typed fields first and then [`extra`](Frontmatter::extra).
270    pub fn get(&self, key: &str) -> Option<Value> {
271        match key {
272            "type" => self.type_.clone().map(Value::String),
273            "id" => self.id.clone().map(Value::String),
274            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
275            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
276            "summary" => self.summary.clone().map(Value::String),
277            "status" => self.status.clone().map(Value::String),
278            "tags" => {
279                if self.tags.is_empty() {
280                    None
281                } else {
282                    Some(Value::Sequence(
283                        self.tags.iter().cloned().map(Value::String).collect(),
284                    ))
285                }
286            }
287            _ => self.extra.get(key).cloned(),
288        }
289    }
290
291    /// Set a single frontmatter key from a string value, routing universal-
292    /// contract keys to their typed fields and everything else to
293    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
294    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
295        match key {
296            "type" => self.type_ = Some(value.to_string()),
297            "id" => self.id = Some(value.to_string()),
298            "created" => {
299                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
300            }
301            "updated" => {
302                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
303            }
304            "summary" => self.summary = Some(value.to_string()),
305            "status" => self.status = Some(value.to_string()),
306            "tags" => {
307                // Accept either a YAML flow list (`[a, b]`) or a single scalar
308                // tag. Anything that parses to a sequence becomes the tag list;
309                // otherwise the whole string is one tag.
310                self.tags = match serde_norway::from_str::<Value>(value) {
311                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
312                    _ => vec![value.to_string()],
313                };
314            }
315            _ => {
316                // A custom / type-specific field. The value is a scalar string by
317                // default, but the spec's list-valued link fields (e.g.
318                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
319                // block sequence of quoted wiki-links — never the flow-form string
320                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
321                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
322                // sequence whose every item is a clean single wiki-link, store the
323                // canonical sequence so `to_yaml` emits block form. Everything else
324                // — plain text, and a single inline `[[x]]` (which YAML reads as a
325                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
326                // verbatim scalar string, preserving the prior behavior.
327                let stored = parse_link_list_value(value)
328                    .unwrap_or_else(|| Value::String(value.to_string()));
329                self.extra.insert(key.to_string(), stored);
330            }
331        }
332        Ok(())
333    }
334
335    /// Extract every frontmatter field whose value is a wiki-link (scalar
336    /// inline form or a block-sequence list), pairing each with its key. The
337    /// validate engine checks these against `(link)` schema annotations.
338    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
339        let mut out = Vec::new();
340        // `summary` may carry navigational wiki-links (spec encourages it).
341        if let Some(summary) = &self.summary {
342            for link in extract_wiki_links(summary, Path::new("")) {
343                out.push(("summary".to_string(), link));
344            }
345        }
346        // Every type-specific / custom field: a scalar wiki-link or a list of
347        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
348        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
349        for (key, value) in &self.extra {
350            for link in links_in_field_value(value) {
351                out.push((key.clone(), link));
352            }
353        }
354        out
355    }
356}
357
358/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
359///
360/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
361/// flags whether it's a full store-relative path (the doctrine) versus a
362/// short-form (a validation error).
363#[derive(Debug, Clone, PartialEq, Eq)]
364pub struct WikiLink {
365    /// The link target as written, without the `[[ ]]` and without `|display`.
366    pub target: String,
367    /// The optional `|display` text override.
368    pub display: Option<String>,
369    /// True when `target` is a full store-relative path (contains a `/` and
370    /// resolves under a known layer); false for short-form targets like
371    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
372    pub is_full_path: bool,
373    /// True when `target` carries a trailing `.md` extension — validate warns
374    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
375    pub has_md_extension: bool,
376    /// Where the link appears: `(file, line, col)`, 1-based line and column.
377    pub location: (PathBuf, u32, u32),
378}
379
380/// A standard markdown link `[text](url)` — an external reference, kept in a
381/// stream separate from [`WikiLink`] so external targets are visible to the
382/// toolkit without being conflated with in-store edges. Not graph-validated.
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct MarkdownLink {
385    /// The link text inside `[ ]`.
386    pub text: String,
387    /// The URL or path inside `( )`.
388    pub url: String,
389    /// Where the link appears: `(file, line, col)`, 1-based.
390    pub location: (PathBuf, u32, u32),
391}
392
393/// A `##`/`###` section of a markdown body: the heading text plus the byte
394/// slice of the body it spans (heading line through the line before the next
395/// heading of equal-or-shallower depth).
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub struct Section {
398    /// The heading text (without the leading `#`s).
399    pub heading: String,
400    /// Heading depth (number of leading `#`s).
401    pub level: u8,
402    /// The 1-based line where the heading appears.
403    pub line: u32,
404    /// The section body, from the heading line to the next sibling-or-shallower
405    /// heading (exclusive), as a slice of the original body.
406    pub body: String,
407}
408
409/// The parsed structured content of a store's `DB.md` config file.
410///
411/// All four parts are optional in the source; absent parts fall back to spec
412/// defaults. Produced by [`parse_db_md`].
413#[derive(Debug, Clone, Default, PartialEq)]
414pub struct Config {
415    /// Body of the `## Agent instructions` section — free-form prose passed to
416    /// the agent's system prompt.
417    pub agent_instructions: Option<String>,
418    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
419    /// refuses to write (`POLICY_FROZEN_PAGE`).
420    pub frozen_pages: Vec<PathBuf>,
421    /// `## Policies` → `### Ignored types`: type names the curator never
422    /// synthesizes (still readable as ambient context).
423    pub ignored_types: Vec<String>,
424    /// `## Schemas` → one entry per `### <type>` sub-section.
425    pub schemas: BTreeMap<String, Schema>,
426}
427
428impl Config {
429    /// The `### Frozen pages` entry that matches a store-relative `target`, if
430    /// any. The **single** frozen-page matcher every write surface must funnel
431    /// through so the policy is enforced identically on `write` / `fm set` /
432    /// `fm init` / `link` / `rename` / `format`.
433    ///
434    /// Comparison is normalized so a policy line and a write target match
435    /// regardless of incidental spelling differences:
436    /// - `/` path separators on every OS,
437    /// - a single leading `./` dropped,
438    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
439    ///   frozen entries verbatim, so an operator who writes the natural
440    ///   extensionless spelling (`records/decisions/q1`) must protect the file
441    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
442    ///
443    /// Returns the matched config entry verbatim (its original spelling) so the
444    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
445    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
446        let want = normalize_frozen_path(target);
447        self.frozen_pages
448            .iter()
449            .find(|frozen| normalize_frozen_path(frozen) == want)
450            .cloned()
451    }
452
453    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
454    /// over [`Config::frozen_match`] for callers that only need presence.
455    pub fn is_frozen(&self, target: &Path) -> bool {
456        self.frozen_match(target).is_some()
457    }
458}
459
460/// Normalize a path for frozen-page comparison: `/` separators, a single
461/// leading `./` dropped, and a trailing `.md` dropped. Both the policy entry
462/// and the write target pass through this before equality, so the match is
463/// separator-, `./`-, and `.md`-insensitive.
464fn normalize_frozen_path(p: &Path) -> String {
465    let unix: String = p
466        .components()
467        .filter_map(|c| c.as_os_str().to_str())
468        .collect::<Vec<_>>()
469        .join("/");
470    let no_dot = unix.strip_prefix("./").unwrap_or(&unix);
471    no_dot.strip_suffix(".md").unwrap_or(no_dot).to_string()
472}
473
474/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
475/// The store's `## Schemas` is the **only** source of schema enforcement — the
476/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
477#[derive(Debug, Clone, Default, PartialEq)]
478pub struct Schema {
479    /// One [`FieldSpec`] per bulleted field line, in source order.
480    pub fields: Vec<FieldSpec>,
481    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
482    /// uniqueness constraint over the listed field(s) (compound when >1). Two
483    /// records of this type whose listed values collide warn as
484    /// `DUP_UNIQUE_KEY`.
485    pub unique_keys: Vec<Vec<String>>,
486    /// `- summary_template: <template>` directive — the `{field}` interpolation
487    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
488    /// for this type. `None` falls back to the body's first paragraph.
489    pub summary_template: Option<String>,
490    /// `- shard: by-date | flat` directive — whether records of this type are
491    /// date-sharded on disk (`records/<type>/<YYYY>/<MM>/…`) or kept flat.
492    /// `None` = no directive declared, so the store's built-in default for the
493    /// type applies ([`crate::store::Store::type_shards`]); `Some(true)` forces
494    /// date-sharding (e.g. a custom event type the toolkit has no built-in for);
495    /// `Some(false)` forces flat. This is the v0.2 generic-model way to declare
496    /// sharding — the toolkit ships no implicit per-type behavior beyond the
497    /// example-type defaults.
498    pub shard: Option<bool>,
499}
500
501/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
502///
503/// Modifiers are comma-separated inside the parens; this captures the
504/// recognized ones as typed fields and stashes anything unrecognized in
505/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
506#[derive(Debug, Clone, Default, PartialEq)]
507pub struct FieldSpec {
508    /// The field name.
509    pub name: String,
510    /// `required` modifier present.
511    pub required: bool,
512    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
513    /// `url`), if any.
514    pub shape: Option<Shape>,
515    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
516    /// start with. The trailing slash is required in the source syntax.
517    pub link_prefix: Option<PathBuf>,
518    /// `default <value>` — the value written when the field is absent.
519    pub default: Option<Value>,
520    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
521    /// on the line because of its own commas).
522    pub enum_values: Option<Vec<String>>,
523    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
524    /// validate surfaces these as `Info`, never errors.
525    pub unknown_modifiers: Vec<String>,
526}
527
528/// A recognized shape modifier for a schema field. Validate enforces the
529/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
530#[derive(Debug, Clone, Copy, PartialEq, Eq)]
531pub enum Shape {
532    /// Any scalar string.
533    String,
534    /// Integer.
535    Int,
536    /// Boolean.
537    Bool,
538    /// RFC3339 / ISO-8601 date.
539    Date,
540    /// `<local>@<domain>` email address.
541    Email,
542    /// A currency amount.
543    Currency,
544    /// A URL.
545    Url,
546}
547
548/// The result of splitting a raw file into its frontmatter block and body.
549///
550/// `body` is the verbatim remainder after the closing `---` fence — the writer
551/// preserves it byte-for-byte so operator edits are never reflowed.
552#[derive(Debug, Clone, PartialEq, Eq)]
553pub struct ParsedFile {
554    /// The raw frontmatter YAML (between the fences, exclusive of them).
555    pub frontmatter_yaml: String,
556    /// The verbatim body (everything after the closing `---`).
557    pub body: String,
558}
559
560/// Split a file's full text into its frontmatter block and body. The
561/// frontmatter block must be the very first thing in the file, delimited by
562/// `---` on its own line at start and end. Returns
563/// [`ParseError::MissingFrontmatter`] if absent.
564pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
565    // The opening fence must be the very first line: `---` (optionally with a
566    // trailing CR), no leading whitespace, nothing before it.
567    let mut lines = text.split_inclusive('\n');
568    let first = lines.next().unwrap_or("");
569    if first.trim_end_matches(['\r', '\n']) != "---" {
570        return Err(ParseError::MissingFrontmatter {
571            file: file.to_path_buf(),
572        });
573    }
574
575    // Scan for the closing fence line. Track byte offsets so we can slice the
576    // YAML (between fences, exclusive) and the body (verbatim, after the
577    // closing fence's line terminator).
578    let opening_len = first.len();
579    let mut offset = opening_len;
580    for line in lines {
581        if line.trim_end_matches(['\r', '\n']) == "---" {
582            let yaml = &text[opening_len..offset];
583            let body_start = offset + line.len();
584            let body = &text[body_start..];
585            return Ok(ParsedFile {
586                frontmatter_yaml: yaml.to_string(),
587                body: body.to_string(),
588            });
589        }
590        offset += line.len();
591    }
592
593    // Opening fence present but no closing fence: malformed frontmatter block.
594    Err(ParseError::MissingFrontmatter {
595        file: file.to_path_buf(),
596    })
597}
598
599/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
600/// verbatim body string.
601pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
602    let text = std::fs::read_to_string(path)?;
603    let parsed = split_frontmatter(&text, path)?;
604    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
605    Ok((fm, parsed.body))
606}
607
608/// Atomically write a markdown file from frontmatter + body: emit the
609/// frontmatter in canonical key order, then the body verbatim, via a
610/// temp-file-rename so a reader never sees a half-written file. Preserves the
611/// operator-edited body exactly as given.
612pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
613    let yaml = frontmatter.to_yaml();
614    // `to_yaml` already terminates each block with a newline. Compose the file
615    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
616    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
617    contents.push_str("---\n");
618    contents.push_str(&yaml);
619    contents.push_str("---\n");
620    contents.push_str(body);
621
622    // One durable, atomic write for all primary data (see `crate::fsx`):
623    // temp-file + fsync + rename + parent-fsync. Content records are primary
624    // data, so they get the durable path (unlike the rebuildable index).
625    crate::fsx::write_atomic(path, contents.as_bytes())?;
626    Ok(())
627}
628
629/// Extract every wiki-link from a body (and inline frontmatter), returning the
630/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
631/// `(file, line, col)` locations set.
632pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
633    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
634    let re = RE.get_or_init(|| {
635        // [[target]] or [[target|display]]; target/display exclude brackets and
636        // (for target) the `|` separator so nested forms don't over-match.
637        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
638    });
639
640    let mut out = Vec::new();
641    for (line_idx, line) in body.lines().enumerate() {
642        for caps in re.captures_iter(line) {
643            let whole = caps.get(0).expect("group 0 always present");
644            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
645            let display = caps.get(2).map(|m| m.as_str().to_string());
646            out.push(WikiLink {
647                is_full_path: target_is_full_path(&target),
648                has_md_extension: target_has_md_extension(&target),
649                target,
650                display,
651                location: (
652                    file.to_path_buf(),
653                    (line_idx as u32) + 1,
654                    char_column(line, whole.start()),
655                ),
656            });
657        }
658    }
659    out
660}
661
662/// Extract every standard markdown link `[text](url)` from a body into a
663/// separate stream, kept distinct from wiki-links.
664pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
665    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
666    let re = RE.get_or_init(|| {
667        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
668        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
669        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
670    });
671
672    let mut out = Vec::new();
673    for (line_idx, line) in body.lines().enumerate() {
674        for caps in re.captures_iter(line) {
675            let whole = caps.get(0).expect("group 0 always present");
676            out.push(MarkdownLink {
677                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
678                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
679                location: (
680                    file.to_path_buf(),
681                    (line_idx as u32) + 1,
682                    char_column(line, whole.start()),
683                ),
684            });
685        }
686    }
687    out
688}
689
690/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
691/// written so YAML parses it as nested sequences instead of a clean list of
692/// strings. Returns the offending keys so validate can emit
693/// `WIKI_LINK_FLOW_FORM_LIST`.
694///
695/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
696/// shapes nest:
697///
698/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
699///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
700/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
701///   (triple-nested). The list mis-encoding — flagged.
702/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
703///   is flagged too; the canonical list form must quote each item
704///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
705///
706/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
707/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
708/// single item is a sequence-of-scalars.
709pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
710    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
711        Ok(v) => v,
712        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
713        Err(_) => return Vec::new(),
714    };
715    let Value::Mapping(map) = value else {
716        return Vec::new();
717    };
718
719    let mut out = Vec::new();
720    for (k, v) in &map {
721        if let Value::Sequence(items) = v {
722            // Triple-nesting: some outer item is a sequence that itself holds a
723            // sequence. Scalar inline `[[x]]` is only double-nested, so it
724            // never matches.
725            let is_link_list = items.iter().any(|item| match item {
726                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
727                _ => false,
728            });
729            if is_link_list {
730                if let Some(key) = k.as_str() {
731                    out.push(key.to_string());
732                }
733            }
734        }
735    }
736    out
737}
738
739/// Extract the `##`/`###` sections of a markdown body into a flat list with
740/// body slices.
741pub fn extract_sections(body: &str) -> Vec<Section> {
742    // Keep each line's start so we can slice the body verbatim (exact newlines).
743    let lines: Vec<&str> = body.split_inclusive('\n').collect();
744
745    // First pass: classify heading levels (0 = not a heading), honoring fenced
746    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
747    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
748    let mut fence: Option<(u8, usize)> = None;
749    for line in &lines {
750        let content = line.trim_end_matches(['\n', '\r']);
751        if let Some(f) = fence {
752            if is_closing_fence(content, f) {
753                fence = None;
754            }
755            levels.push(0);
756            continue;
757        }
758        if let Some(opened) = opening_fence(content) {
759            fence = Some(opened);
760            levels.push(0);
761            continue;
762        }
763        levels.push(heading_level(content));
764    }
765
766    // Second pass: emit `##`+ headings; each section body runs from its heading
767    // line to the next heading at an equal-or-shallower level (exclusive).
768    let mut sections = Vec::new();
769    for (i, &lvl) in levels.iter().enumerate() {
770        if lvl < 2 {
771            continue;
772        }
773        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
774        let heading = heading_text(heading_line, lvl);
775
776        let mut end = lines.len();
777        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
778            if other != 0 && other <= lvl {
779                end = j;
780                break;
781            }
782        }
783
784        sections.push(Section {
785            heading,
786            level: lvl,
787            line: (i + 1) as u32,
788            body: lines[i..end].concat(),
789        });
790    }
791    sections
792}
793
794/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
795/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
796/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
797/// ignored; absent sections leave their [`Config`] fields at default.
798pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
799    // The structured sections live in the body (after frontmatter). DB.md must
800    // still start with a valid `---` block (`type: db-md`); if it's missing we
801    // surface MissingFrontmatter like any other file.
802    let parsed = split_frontmatter(text, file)?;
803    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
804    let sections = extract_sections(&parsed.body);
805
806    let mut config = Config::default();
807    // Track which H2 region each H3 belongs to as we walk the flat list.
808    let mut current_h2: Option<String> = None;
809
810    for section in &sections {
811        match section.level {
812            2 => {
813                let name = section.heading.trim().to_ascii_lowercase();
814                current_h2 = Some(name.clone());
815                if name == "agent instructions" {
816                    let prose = section_prose(&section.body);
817                    if !prose.is_empty() {
818                        config.agent_instructions = Some(prose);
819                    }
820                }
821            }
822            3 => {
823                let h2 = current_h2.as_deref().unwrap_or("");
824                let h3 = section.heading.trim().to_ascii_lowercase();
825                match (h2, h3.as_str()) {
826                    ("policies", "frozen pages") => {
827                        config.frozen_pages = bullet_lines(&section.body)
828                            .into_iter()
829                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
830                            .collect();
831                    }
832                    ("policies", "ignored types") => {
833                        config.ignored_types = bullet_lines(&section.body)
834                            .into_iter()
835                            .flat_map(|b| extract_type_list_bullet(&b))
836                            .collect();
837                    }
838                    ("schemas", _) => {
839                        // The H3 heading text (as written) is the type name.
840                        let type_name = section.heading.trim().to_string();
841                        let mut schema = Schema::default();
842                        for b in bullet_lines(&section.body) {
843                            match parse_schema_bullet(&b) {
844                                SchemaBullet::Field(f) => schema.fields.push(f),
845                                SchemaBullet::Unique(k) if !k.is_empty() => {
846                                    schema.unique_keys.push(k)
847                                }
848                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
849                                    schema.summary_template = Some(t)
850                                }
851                                SchemaBullet::Shard(Some(b)) => schema.shard = Some(b),
852                                // Empty `unique:`/`summary_template:`, or a `shard:`
853                                // with an unrecognized value — ignored.
854                                SchemaBullet::Unique(_)
855                                | SchemaBullet::SummaryTemplate(_)
856                                | SchemaBullet::Shard(None) => {}
857                            }
858                        }
859                        config.schemas.insert(type_name, schema);
860                    }
861                    _ => {}
862                }
863            }
864            _ => {}
865        }
866    }
867
868    Ok(config)
869}
870
871/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
872/// reserved directive (`unique:` / `summary_template:` / `shard:`). The names
873/// `unique`, `summary_template`, and `shard` are reserved and cannot be used as
874/// field names.
875#[derive(Debug)]
876enum SchemaBullet {
877    /// An ordinary `- <name> (<modifiers>)` field.
878    Field(FieldSpec),
879    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
880    Unique(Vec<String>),
881    /// `- summary_template: <template>` — the default-`summary` pattern.
882    SummaryTemplate(String),
883    /// `- shard: by-date | flat` — date-shard records of this type, or keep them
884    /// flat. `None` = an unrecognized value, ignored like an unknown modifier.
885    Shard(Option<bool>),
886}
887
888/// Classify one `## Schemas` bullet as a directive or a field. The directive
889/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
890/// guards against false positives — a field like `- status (enum: a, b)` has a
891/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
892/// as a [`FieldSpec`].
893fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
894    let line = bullet_line.trim();
895    let line = line
896        .strip_prefix("- ")
897        .or_else(|| line.strip_prefix("* "))
898        .or_else(|| line.strip_prefix("+ "))
899        .or_else(|| line.strip_prefix('-'))
900        .unwrap_or(line)
901        .trim();
902
903    if let Some((head, rest)) = line.split_once(':') {
904        match head.trim().to_ascii_lowercase().as_str() {
905            "unique" => {
906                let fields = rest
907                    .split(',')
908                    .map(|f| f.trim().to_string())
909                    .filter(|f| !f.is_empty())
910                    .collect();
911                return SchemaBullet::Unique(fields);
912            }
913            "summary_template" => {
914                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
915            }
916            "shard" => {
917                // `by-date` (synonyms: date/sharded/true) enables date-sharding;
918                // `flat` (none/false) forces flat; anything else is ignored.
919                let v = match rest.trim().to_ascii_lowercase().as_str() {
920                    "by-date" | "date" | "sharded" | "true" => Some(true),
921                    "flat" | "none" | "false" => Some(false),
922                    _ => None,
923                };
924                return SchemaBullet::Shard(v);
925            }
926            _ => {}
927        }
928    }
929
930    SchemaBullet::Field(parse_field_spec(bullet_line))
931}
932
933/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
934/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
935/// in [`FieldSpec::unknown_modifiers`].
936pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
937    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
938    let line = bullet_line.trim();
939    let line = line
940        .strip_prefix("- ")
941        .or_else(|| line.strip_prefix("* "))
942        .or_else(|| line.strip_prefix("+ "))
943        .or_else(|| line.strip_prefix('-'))
944        .unwrap_or(line)
945        .trim();
946
947    // Split `<name> (<modifiers>)`. A bullet without parens is a free-form
948    // optional field of any shape — name only, no modifiers.
949    let (name, modifiers) = match line.find('(') {
950        Some(open) => {
951            let name = line[..open].trim().to_string();
952            let after = &line[open + 1..];
953            let mods = match after.rfind(')') {
954                Some(close) => &after[..close],
955                None => after, // tolerate a missing close paren
956            };
957            (name, mods.trim())
958        }
959        None => (line.to_string(), ""),
960    };
961
962    let mut spec = FieldSpec {
963        name,
964        ..FieldSpec::default()
965    };
966
967    if modifiers.is_empty() {
968        return spec;
969    }
970
971    // Modifiers are comma-separated. `enum:` is special: because its own value
972    // list contains commas, it must be last and swallows the remainder.
973    let raw: Vec<&str> = modifiers.split(',').collect();
974    let mut i = 0;
975    while i < raw.len() {
976        let token = raw[i].trim();
977        if token.is_empty() {
978            i += 1;
979            continue;
980        }
981        let lower = token.to_ascii_lowercase();
982
983        if lower == "required" {
984            spec.required = true;
985        } else if let Some(shape) = shape_from_str(&lower) {
986            spec.shape = Some(shape);
987        } else if let Some(rest) = lower.strip_prefix("link to ") {
988            // The trailing slash is required in the source; store the prefix
989            // without it so `Path::starts_with` comparisons are clean.
990            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
991            let _ = rest; // lowercase form only used for the keyword match
992            spec.link_prefix = Some(PathBuf::from(prefix));
993        } else if let Some(_rest) = lower.strip_prefix("default ") {
994            // Value is everything after the keyword on this comma-token,
995            // preserving original case.
996            let value = token["default ".len()..].trim().to_string();
997            spec.default = Some(Value::String(value));
998        } else if lower == "enum" {
999            // Bare `enum` keyword (`enum, open, closed`): the values are the
1000            // REMAINING tokens — the keyword itself must not leak in as a value.
1001            let values: Vec<String> = raw[i + 1..]
1002                .iter()
1003                .map(|v| v.trim().to_string())
1004                .filter(|v| !v.is_empty())
1005                .collect();
1006            spec.enum_values = Some(values);
1007            break; // enum consumed the rest of the line
1008        } else if lower.starts_with("enum:") {
1009            // `enum: open, closed` form: rejoin this token and the rest, then
1010            // drop everything up to and including the `:`.
1011            let mut joined = raw[i..].join(",");
1012            if let Some(colon) = joined.find(':') {
1013                joined = joined[colon + 1..].to_string();
1014            }
1015            let values: Vec<String> = joined
1016                .split(',')
1017                .map(|v| v.trim().to_string())
1018                .filter(|v| !v.is_empty())
1019                .collect();
1020            spec.enum_values = Some(values);
1021            break; // enum consumed the rest of the line
1022        } else {
1023            // Unrecognized modifier — captured verbatim, surfaced as Info.
1024            spec.unknown_modifiers.push(token.to_string());
1025        }
1026        i += 1;
1027    }
1028
1029    spec
1030}
1031
1032// ── Private helpers ─────────────────────────────────────────────────────────
1033
1034/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1035/// is treated as absent; anything else must be an RFC3339 string.
1036fn parse_timestamp(
1037    value: &Value,
1038    key: &str,
1039    file: &Path,
1040) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1041    match value {
1042        Value::Null => Ok(None),
1043        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1044        other => Err(ParseError::BadTimestamp {
1045            file: file.to_path_buf(),
1046            key: key.to_string(),
1047            value: format!("{other:?}"),
1048        }),
1049    }
1050}
1051
1052/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1053fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1054    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1055        file: file.to_path_buf(),
1056        key: key.to_string(),
1057        value: s.to_string(),
1058    })
1059}
1060
1061/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1062/// (the canonical form) or a single scalar (coerced to a one-element list).
1063fn parse_tags(value: &Value) -> Vec<String> {
1064    match value {
1065        Value::Sequence(items) => items
1066            .iter()
1067            .filter_map(|v| match v {
1068                Value::String(s) => Some(s.clone()),
1069                Value::Number(n) => Some(n.to_string()),
1070                Value::Bool(b) => Some(b.to_string()),
1071                _ => None,
1072            })
1073            .collect(),
1074        Value::String(s) => vec![s.clone()],
1075        _ => Vec::new(),
1076    }
1077}
1078
1079/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1080/// location, or `None` if the string is not a bare wiki-link. Used for
1081/// frontmatter-valued links where there is no body position to report.
1082fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1083    let s = s.trim();
1084    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1085    // Reject anything with further brackets (e.g. the nested flow-form item),
1086    // which is not a clean single wiki-link.
1087    if inner.contains('[') || inner.contains(']') {
1088        return None;
1089    }
1090    let (target, display) = match inner.split_once('|') {
1091        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1092        None => (inner.to_string(), None),
1093    };
1094    Some(WikiLink {
1095        is_full_path: target_is_full_path(&target),
1096        has_md_extension: target_has_md_extension(&target),
1097        target,
1098        display,
1099        location: (PathBuf::new(), 0, 0),
1100    })
1101}
1102
1103/// Extract every wiki-link from a single frontmatter field value, accepting the
1104/// two canonical forms the spec defines (SPEC § Linking):
1105///
1106/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1107///   canonical unquoted inline (`f: [[x]]`) form, and
1108/// - a **list** field whose items are quoted wiki-link strings
1109///   (`- "[[x]]"`).
1110///
1111/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1112/// so the parsed [`Value`] shapes are not what one would naively expect:
1113///
1114/// | source                         | parsed `Value`                     | here |
1115/// |--------------------------------|------------------------------------|------|
1116/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1117/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1118/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1119/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1120///
1121/// The last row — an *unquoted list* — parses identically to the flow-form list
1122/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1123/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1124/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1125/// edge enumerator only ever sees the valid canonical forms.
1126///
1127/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1128/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1129/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1130fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1131    // Quoted scalar: `field: "[[x]]"`.
1132    if let Value::String(s) = value {
1133        return parse_wiki_link_str(s).into_iter().collect();
1134    }
1135    let Value::Sequence(items) = value else {
1136        return Vec::new();
1137    };
1138    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1139    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1140    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1141    if items.len() == 1 {
1142        if let Some(link) = unquoted_inline_link(&items[0]) {
1143            return vec![link];
1144        }
1145    }
1146    // Otherwise a list of quoted wiki-link strings; non-string items (the
1147    // unquoted-list mis-encoding) are left for validate to flag.
1148    items
1149        .iter()
1150        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1151        .collect()
1152}
1153
1154/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1155///
1156/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1157/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1158/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1159/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1160/// Re-emitting that verbatim yields the block sequence
1161///
1162/// ```text
1163/// company:
1164/// - - records/companies/northstar
1165/// ```
1166///
1167/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1168/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1169/// a value back into the canonical emitted form before it is written:
1170///
1171/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1172///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1173///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1174///   that [`links_in_field_value`] reads back as the same scalar link;
1175/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1176///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1177///   `set` write-in path and the canonical list form;
1178/// - everything else → returned verbatim (the common no-op for non-link values).
1179///
1180/// `|display` is preserved in both link branches. This is the single point that
1181/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1182/// corrupting a pre-existing canonical link, since they all funnel through
1183/// `to_yaml`.
1184fn canonicalize_extra_value(value: &Value) -> Value {
1185    match value {
1186        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1187        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1188        // YAML nested sequence). Non-link strings are returned untouched.
1189        Value::String(s) => match parse_wiki_link_str(s) {
1190            Some(link) => Value::String(wiki_link_literal(&link)),
1191            None => value.clone(),
1192        },
1193        Value::Sequence(items) => {
1194            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1195            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1196            // scalar string so the link is preserved rather than block-emitted.
1197            if items.len() == 1 {
1198                if let Some(link) = unquoted_inline_link(&items[0]) {
1199                    return Value::String(wiki_link_literal(&link));
1200                }
1201            }
1202            // List of wiki-links: re-emit as a block sequence of quoted-link
1203            // strings, the canonical list form `to_yaml` renders block-style and
1204            // `links_in_field_value` accepts. Only canonicalize when *every* item
1205            // is a clean single wiki-link; a list with any non-link item is left
1206            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1207            // validate flags) are untouched.
1208            let mut links = Vec::with_capacity(items.len());
1209            for item in items {
1210                match link_from_flow_list_item(item) {
1211                    Some(link) => links.push(link),
1212                    None => return value.clone(),
1213                }
1214            }
1215            if links.is_empty() {
1216                return value.clone();
1217            }
1218            Value::Sequence(
1219                links
1220                    .iter()
1221                    .map(|l| Value::String(wiki_link_literal(l)))
1222                    .collect(),
1223            )
1224        }
1225        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1226        _ => value.clone(),
1227    }
1228}
1229
1230/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1231/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1232fn wiki_link_literal(link: &WikiLink) -> String {
1233    match &link.display {
1234        Some(d) => format!("[[{}|{}]]", link.target, d),
1235        None => format!("[[{}]]", link.target),
1236    }
1237}
1238
1239/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1240/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1241/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1242/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1243/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1244/// from being mistaken for a wiki-link.
1245fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1246    let Value::Sequence(items) = v else {
1247        return None;
1248    };
1249    if items.len() != 1 {
1250        return None;
1251    }
1252    let s = items[0].as_str()?;
1253    // A clean unquoted wiki-link has no further brackets inside it.
1254    if s.contains('[') || s.contains(']') {
1255        return None;
1256    }
1257    parse_wiki_link_str(&format!("[[{s}]]"))
1258}
1259
1260/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1261/// wiki-links** that should be stored as a YAML block sequence, returning the
1262/// canonical `Value::Sequence` of quoted-link strings when so.
1263///
1264/// The value path of every write surface stringifies its argument; without this
1265/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1266/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1267/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1268/// two list spellings an agent naturally types and normalizes both to the block
1269/// form the canonical writer emits and `dbmd validate` accepts:
1270///
1271/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1272/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1273///
1274/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1275/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1276/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1277/// renders inline), an empty list, or a list with any non-link item. A single
1278/// link must stay scalar; only genuine multi-item-or-explicit lists become
1279/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1280/// validator never disagree.
1281fn parse_link_list_value(value: &str) -> Option<Value> {
1282    let trimmed = value.trim();
1283    // Only a YAML *flow sequence* literal is a list candidate; anything not
1284    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1285    // the single-inline-link guard below).
1286    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1287        return None;
1288    }
1289    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1290        return None;
1291    };
1292    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1293    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1294    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1295    // list. `links_in_field_value` reads it back as a scalar link either way.
1296    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1297        return None;
1298    }
1299    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1300    // spellings an agent types (see [`link_from_flow_list_item`]).
1301    let mut links = Vec::with_capacity(items.len());
1302    for item in &items {
1303        links.push(link_from_flow_list_item(item)?);
1304    }
1305    if links.is_empty() {
1306        return None;
1307    }
1308    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1309    // renders block-style and `links_in_field_value` accepts. `|display` is
1310    // preserved.
1311    let normalized = links
1312        .iter()
1313        .map(|l| Value::String(wiki_link_literal(l)))
1314        .collect();
1315    Some(Value::Sequence(normalized))
1316}
1317
1318/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1319/// across the spellings an agent types for a list. After top-level flow parsing,
1320/// a list item arrives in one of:
1321///
1322/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1323/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1324///   (one level deeper than a bare unquoted scalar, because the surrounding list
1325///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1326///   `Seq[String(x)]` with [`unquoted_inline_link`].
1327///
1328/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1329/// caller falls back to a scalar string and never fabricates a partial list.
1330fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1331    match item {
1332        Value::String(s) => parse_wiki_link_str(s),
1333        Value::Sequence(inner) => {
1334            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1335            // wrapper to expose the inline-link shape.
1336            if inner.len() == 1 {
1337                if let Some(link) = unquoted_inline_link(&inner[0]) {
1338                    return Some(link);
1339                }
1340            }
1341            // Defensive: also accept the inline-link shape directly.
1342            unquoted_inline_link(item)
1343        }
1344        _ => None,
1345    }
1346}
1347
1348/// A target is a full store-relative path when its first path segment is one of
1349/// the three canonical layer dirs and at least one `/` separator follows. A
1350/// trailing `.md` does not affect this classification.
1351fn target_is_full_path(target: &str) -> bool {
1352    let target = target.trim();
1353    match target.split_once('/') {
1354        Some((head, _rest)) => LAYER_DIRS.contains(&head),
1355        None => false,
1356    }
1357}
1358
1359/// True when the target carries a trailing `.md` extension (validate warns
1360/// `WIKI_LINK_HAS_EXTENSION`).
1361fn target_has_md_extension(target: &str) -> bool {
1362    target.trim().ends_with(".md")
1363}
1364
1365/// 1-based character (Unicode scalar) column of `byte_offset` within `line`.
1366fn char_column(line: &str, byte_offset: usize) -> u32 {
1367    (line[..byte_offset].chars().count() as u32) + 1
1368}
1369
1370/// Map a lowercase shape keyword to its [`Shape`].
1371fn shape_from_str(s: &str) -> Option<Shape> {
1372    match s {
1373        "string" => Some(Shape::String),
1374        "int" => Some(Shape::Int),
1375        "bool" => Some(Shape::Bool),
1376        "date" => Some(Shape::Date),
1377        "email" => Some(Shape::Email),
1378        "currency" => Some(Shape::Currency),
1379        "url" => Some(Shape::Url),
1380        _ => None,
1381    }
1382}
1383
1384/// The ATX heading level of a line (number of leading `#`), or 0 if not a
1385/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
1386/// end-of-line) after the `#` run, caps the run at six.
1387fn heading_level(line: &str) -> u8 {
1388    let indent = line.len() - line.trim_start_matches(' ').len();
1389    if indent > 3 {
1390        return 0;
1391    }
1392    let rest = &line[indent..];
1393    let hashes = rest.len() - rest.trim_start_matches('#').len();
1394    if hashes == 0 || hashes > 6 {
1395        return 0;
1396    }
1397    let after = &rest[hashes..];
1398    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
1399        hashes as u8
1400    } else {
1401        0
1402    }
1403}
1404
1405/// The heading text after the `#` run, trimmed, with any trailing ATX closing
1406/// `#` sequence removed (`## Title ##` → `Title`).
1407fn heading_text(line: &str, level: u8) -> String {
1408    let indent = line.len() - line.trim_start_matches(' ').len();
1409    let after_hashes = &line[indent + level as usize..];
1410    let trimmed = after_hashes.trim();
1411    let no_trailing = trimmed.trim_end_matches('#');
1412    if no_trailing.len() == trimmed.len() {
1413        trimmed.to_string()
1414    } else {
1415        no_trailing.trim_end().to_string()
1416    }
1417}
1418
1419/// If `line` opens a fenced code block, return `(fence byte, run length)`.
1420fn opening_fence(line: &str) -> Option<(u8, usize)> {
1421    let indent = line.len() - line.trim_start_matches(' ').len();
1422    if indent > 3 {
1423        return None;
1424    }
1425    let rest = &line[indent..];
1426    let byte = rest.bytes().next()?;
1427    if byte != b'`' && byte != b'~' {
1428        return None;
1429    }
1430    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1431    if run < 3 {
1432        return None;
1433    }
1434    // A backtick fence's info string may not itself contain a backtick.
1435    if byte == b'`' && rest[run..].contains('`') {
1436        return None;
1437    }
1438    Some((byte, run))
1439}
1440
1441/// True if `line` closes the currently open fence: same char, run at least as
1442/// long, nothing but trailing whitespace after.
1443fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
1444    let (byte, open_len) = fence;
1445    let indent = line.len() - line.trim_start_matches(' ').len();
1446    if indent > 3 {
1447        return false;
1448    }
1449    let rest = &line[indent..];
1450    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1451    if run < open_len {
1452        return false;
1453    }
1454    rest[run..].trim().is_empty()
1455}
1456
1457/// The prose body of a section: everything after the heading line, trimmed.
1458fn section_prose(section_body: &str) -> String {
1459    match section_body.split_once('\n') {
1460        Some((_heading, rest)) => rest.trim().to_string(),
1461        None => String::new(),
1462    }
1463}
1464
1465/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
1466/// line, each returned with its leading whitespace trimmed.
1467fn bullet_lines(section_body: &str) -> Vec<String> {
1468    section_body
1469        .lines()
1470        .skip(1) // the heading line
1471        .map(str::trim)
1472        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
1473        .map(|l| l.to_string())
1474        .collect()
1475}
1476
1477/// Cut a bullet's content at the first ` — ` / ` -- ` comment separator,
1478/// returning only the meaningful prefix.
1479fn strip_bullet_comment(content: &str) -> &str {
1480    let mut cut = content.len();
1481    for sep in [" — ", " -- ", " – "] {
1482        if let Some(idx) = content.find(sep) {
1483            cut = cut.min(idx);
1484        }
1485    }
1486    content[..cut].trim()
1487}
1488
1489/// Strip the leading bullet marker, returning the trimmed content after it.
1490fn bullet_content(bullet: &str) -> &str {
1491    let t = bullet.trim();
1492    t.strip_prefix("- ")
1493        .or_else(|| t.strip_prefix("* "))
1494        .or_else(|| t.strip_prefix("+ "))
1495        .unwrap_or(t)
1496        .trim()
1497}
1498
1499/// Extract a store-relative path from a Frozen-pages bullet. The path may be
1500/// wrapped in backticks and followed by an em-dash comment.
1501fn extract_path_bullet(bullet: &str) -> String {
1502    let content = bullet_content(bullet);
1503    // Prefer a backtick-delimited span if present.
1504    if let Some(start) = content.find('`') {
1505        if let Some(end_rel) = content[start + 1..].find('`') {
1506            return content[start + 1..start + 1 + end_rel].trim().to_string();
1507        }
1508    }
1509    // Otherwise take the text up to a comment separator, stripping quotes.
1510    strip_bullet_comment(content)
1511        .trim_matches('"')
1512        .trim_matches('\'')
1513        .trim()
1514        .to_string()
1515}
1516
1517/// Extract a comma-separated type list from an Ignored-types bullet, stripping
1518/// backticks/quotes and any trailing em-dash comment.
1519fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
1520    let content = strip_bullet_comment(bullet_content(bullet));
1521    content
1522        .split(',')
1523        .map(|t| {
1524            t.trim()
1525                .trim_matches('`')
1526                .trim_matches('"')
1527                .trim_matches('\'')
1528                .trim()
1529                .to_string()
1530        })
1531        .filter(|t| !t.is_empty())
1532        .collect()
1533}
1534
1535#[cfg(test)]
1536mod tests {
1537    use super::*;
1538    use std::path::Path;
1539    use tempfile::tempdir;
1540
1541    // ── Config::frozen_match (the single write-surface policy matcher) ───────
1542
1543    #[test]
1544    fn frozen_match_is_md_insensitive_both_directions() {
1545        // A policy entry stored WITHOUT `.md` (the natural extensionless
1546        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
1547        // write target — the regression every write surface had.
1548        let cfg = Config {
1549            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1550            ..Config::default()
1551        };
1552        assert_eq!(
1553            cfg.frozen_match(Path::new("records/decisions/q1.md")),
1554            Some(PathBuf::from("records/decisions/q1")),
1555            "extensionless policy entry must freeze the .md file"
1556        );
1557        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1558
1559        // The symmetric case: a policy entry WITH `.md` matches a bare target.
1560        let cfg = Config {
1561            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1562            ..Config::default()
1563        };
1564        assert_eq!(
1565            cfg.frozen_match(Path::new("records/decisions/q1")),
1566            Some(PathBuf::from("records/decisions/q1.md")),
1567        );
1568        // And the same-spelling cases still match.
1569        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
1570    }
1571
1572    #[test]
1573    fn frozen_match_drops_leading_dot_slash() {
1574        let cfg = Config {
1575            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
1576            ..Config::default()
1577        };
1578        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
1579        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
1580    }
1581
1582    #[test]
1583    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
1584        let cfg = Config {
1585            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
1586            ..Config::default()
1587        };
1588        assert!(cfg
1589            .frozen_match(Path::new("records/decisions/q2.md"))
1590            .is_none());
1591        // A prefix is not a match: `q1` must not freeze `q1-draft`.
1592        assert!(cfg
1593            .frozen_match(Path::new("records/decisions/q1-draft.md"))
1594            .is_none());
1595        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
1596    }
1597
1598    // ── split_frontmatter ───────────────────────────────────────────────────
1599
1600    #[test]
1601    fn split_frontmatter_separates_yaml_and_verbatim_body() {
1602        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
1603        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1604        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
1605        // Body is everything after the closing fence's newline, byte-for-byte.
1606        assert_eq!(p.body, "# Heading\n\nBody line.\n");
1607    }
1608
1609    #[test]
1610    fn split_frontmatter_preserves_body_without_trailing_newline() {
1611        let text = "---\ntype: x\n---\nno trailing newline";
1612        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1613        assert_eq!(p.body, "no trailing newline");
1614    }
1615
1616    #[test]
1617    fn split_frontmatter_empty_body_when_nothing_after_fence() {
1618        let text = "---\ntype: x\n---\n";
1619        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
1620        assert_eq!(p.body, "");
1621    }
1622
1623    #[test]
1624    fn split_frontmatter_missing_opening_fence_errors() {
1625        let text = "# No frontmatter here\ntype: x\n";
1626        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1627        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1628    }
1629
1630    #[test]
1631    fn split_frontmatter_leading_content_before_fence_rejected() {
1632        // The opening fence must be the very first line; a blank line first is
1633        // not allowed.
1634        let text = "\n---\ntype: x\n---\nbody";
1635        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1636        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1637    }
1638
1639    #[test]
1640    fn split_frontmatter_unterminated_block_errors() {
1641        let text = "---\ntype: x\nsummary: y\n";
1642        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
1643        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
1644    }
1645
1646    // ── Frontmatter::parse ───────────────────────────────────────────────────
1647
1648    #[test]
1649    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
1650        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
1651        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1652        assert_eq!(fm.type_.as_deref(), Some("contact"));
1653        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
1654        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
1655        assert_eq!(fm.status.as_deref(), Some("active"));
1656        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
1657        // Type-specific fields are NOT promoted to typed slots.
1658        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
1659        assert!(!fm.extra.contains_key("tags"));
1660        assert_eq!(
1661            fm.extra.get("email").and_then(|v| v.as_str()),
1662            Some("sarah@northstar.io")
1663        );
1664        assert_eq!(
1665            fm.extra.get("role").and_then(|v| v.as_str()),
1666            Some("Director")
1667        );
1668    }
1669
1670    #[test]
1671    fn parse_reads_rfc3339_timestamps() {
1672        let yaml =
1673            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
1674        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
1675        let created = fm.created.expect("created parsed");
1676        // -07:00 offset is 7 * 3600 seconds west.
1677        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
1678        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
1679        assert!(fm.updated.is_some());
1680    }
1681
1682    #[test]
1683    fn parse_rejects_non_rfc3339_timestamp() {
1684        // A date-only value is not a full RFC3339 timestamp; created/updated
1685        // require the full form.
1686        let yaml = "type: email\ncreated: 2026-05-27";
1687        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1688        match err {
1689            ParseError::BadTimestamp { key, value, .. } => {
1690                assert_eq!(key, "created");
1691                assert_eq!(value, "2026-05-27");
1692            }
1693            other => panic!("expected BadTimestamp, got {other:?}"),
1694        }
1695    }
1696
1697    #[test]
1698    fn parse_malformed_yaml_errors() {
1699        // Unclosed flow mapping is invalid YAML.
1700        let yaml = "type: contact\n  bad: : :\n- nope";
1701        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
1702        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1703    }
1704
1705    #[test]
1706    fn frontmatter_with_yaml_tag_on_mapping_does_not_panic() {
1707        // Regression: a YAML tag on the top-level mapping made the old
1708        // `expect_err` path PANIC, because a tagged mapping deserializes to a
1709        // `Mapping` just fine. It must now be handled — accepted as the inner
1710        // mapping, never a panic.
1711        let fm = Frontmatter::parse("!mytag\ntype: contact\nsummary: hi\n", Path::new("x.md"))
1712            .expect("tagged-mapping frontmatter must parse, not panic");
1713        assert_eq!(fm.type_.as_deref(), Some("contact"));
1714        // A genuine scalar/sequence top level is still malformed (and still
1715        // doesn't panic).
1716        assert!(Frontmatter::parse("- a\n- b\n", Path::new("x.md")).is_err());
1717    }
1718
1719    #[test]
1720    fn parse_empty_block_is_empty_frontmatter() {
1721        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
1722        assert_eq!(fm, Frontmatter::default());
1723    }
1724
1725    #[test]
1726    fn parse_scalar_top_level_is_malformed() {
1727        // A bare scalar at the top level is not a frontmatter mapping.
1728        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
1729        assert!(matches!(err, ParseError::MalformedYaml { .. }));
1730    }
1731
1732    // ── to_yaml canonical order ──────────────────────────────────────────────
1733
1734    #[test]
1735    fn to_yaml_emits_canonical_key_order() {
1736        let mut fm = Frontmatter {
1737            type_: Some("contact".into()),
1738            id: Some("sarah-chen".into()),
1739            summary: Some("Director of Ops".into()),
1740            status: Some("active".into()),
1741            tags: vec!["vip".into()],
1742            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1743            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
1744            ..Default::default()
1745        };
1746        // Two type-specific fields, inserted in NON-alphabetical order to prove
1747        // the writer sorts them (BTreeMap) between the universal head and tail.
1748        fm.extra
1749            .insert("role".into(), Value::String("Director".into()));
1750        fm.extra.insert(
1751            "company".into(),
1752            Value::String("[[records/companies/northstar]]".into()),
1753        );
1754
1755        let yaml = fm.to_yaml();
1756        let keys: Vec<&str> = yaml
1757            .lines()
1758            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
1759            .map(|l| l.split(':').next().unwrap())
1760            .collect();
1761        assert_eq!(
1762            keys,
1763            vec![
1764                "type", "id", "created", "updated", "summary", // universal head
1765                "company", "role",   // type-specific, sorted
1766                "status", // universal tail
1767                "tags",
1768            ],
1769            "canonical order violated; got:\n{yaml}"
1770        );
1771        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
1772        assert!(
1773            yaml.contains("2026-05-27T08:00:00-07:00"),
1774            "created timestamp missing; got:\n{yaml}"
1775        );
1776        // The value re-parses to the same instant regardless of quoting.
1777        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
1778        assert_eq!(reparsed.created, fm.created);
1779        assert_eq!(reparsed.updated, fm.updated);
1780    }
1781
1782    #[test]
1783    fn to_yaml_omits_absent_optional_fields() {
1784        let fm = Frontmatter {
1785            type_: Some("note".into()),
1786            ..Default::default()
1787        };
1788        let yaml = fm.to_yaml();
1789        assert!(yaml.contains("type: note"));
1790        assert!(!yaml.contains("status"));
1791        assert!(!yaml.contains("tags"));
1792        assert!(!yaml.contains("summary"));
1793    }
1794
1795    #[test]
1796    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
1797        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
1798        // *unquoted* inline `company: [[records/companies/northstar]]`
1799        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
1800        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
1801        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
1802        //     company:
1803        //     - - records/companies/northstar
1804        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
1805        // any `fm set` / `link` write) silently destroyed the link.
1806        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
1807        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1808        // Sanity: it really parsed as the nested sequence, not a string.
1809        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
1810
1811        let out = fm.to_yaml();
1812        // The link must survive as a quoted inline scalar — brackets intact, and
1813        // never the bracket-less block sequence `- - records/...`.
1814        assert!(
1815            out.contains("[[records/companies/northstar]]"),
1816            "canonical writer dropped the wiki-link brackets; got:\n{out}"
1817        );
1818        assert!(
1819            !out.contains("- - "),
1820            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
1821        );
1822
1823        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
1824        // one link with the right target (the edge graph/backlinks rely on).
1825        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1826        let fields = reparsed.link_fields();
1827        let links: Vec<(&str, &str, Option<&str>)> = fields
1828            .iter()
1829            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
1830            .collect();
1831        assert_eq!(
1832            links,
1833            vec![("company", "records/companies/northstar", None)]
1834        );
1835
1836        // A second re-emit is a fixed point — no progressive corruption across
1837        // repeated curator-loop writes.
1838        assert_eq!(
1839            reparsed.to_yaml(),
1840            out,
1841            "to_yaml is not idempotent on links"
1842        );
1843    }
1844
1845    #[test]
1846    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
1847        // The `|display` segment must survive the unquoted-inline round-trip too.
1848        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
1849        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
1850        let out = fm.to_yaml();
1851        assert!(
1852            out.contains("[[records/companies/northstar|Northstar]]"),
1853            "display segment lost on round-trip; got:\n{out}"
1854        );
1855        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
1856        let f = reparsed.link_fields();
1857        assert_eq!(f.len(), 1);
1858        assert_eq!(f[0].1.target, "records/companies/northstar");
1859        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
1860    }
1861
1862    #[test]
1863    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
1864        // A genuine quoted block list of links round-trips as a clean string
1865        // list — never collapsed to a scalar — and a plain nested sequence that
1866        // is NOT a wiki-link is left exactly as written (no false conversion).
1867        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
1868        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
1869        let out = fm.to_yaml();
1870
1871        // Both attendee links survive as quoted strings.
1872        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
1873        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
1874
1875        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
1876        let fields = reparsed.link_fields();
1877        let attendees: Vec<&str> = fields
1878            .iter()
1879            .filter(|(k, _)| k == "attendees")
1880            .map(|(_, l)| l.target.as_str())
1881            .collect();
1882        assert_eq!(
1883            attendees,
1884            vec!["records/contacts/elena", "records/contacts/sarah"]
1885        );
1886        // The non-link nested sequence is preserved verbatim, not touched.
1887        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
1888    }
1889
1890    // ── read_file / write_file round-trip ────────────────────────────────────
1891
1892    #[test]
1893    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
1894        let dir = tempdir().unwrap();
1895        let path = dir.path().join("sources/emails/x.md");
1896        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
1897        let mut fm = Frontmatter {
1898            type_: Some("email".into()),
1899            summary: Some("renewal note".into()),
1900            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
1901            ..Default::default()
1902        };
1903        fm.extra
1904            .insert("from".into(), Value::String("elena@northstar.io".into()));
1905
1906        write_file(&path, &fm, body).unwrap();
1907
1908        let (read_fm, read_body) = read_file(&path).unwrap();
1909        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
1910        assert_eq!(read_fm.type_.as_deref(), Some("email"));
1911        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
1912        assert_eq!(
1913            read_fm.extra.get("from").and_then(|v| v.as_str()),
1914            Some("elena@northstar.io")
1915        );
1916        // The on-disk file starts with a fence and ends with the verbatim body.
1917        let raw = std::fs::read_to_string(&path).unwrap();
1918        assert!(raw.starts_with("---\n"));
1919        assert!(raw.ends_with(body));
1920    }
1921
1922    #[test]
1923    fn roundtrip_modify_summary_then_write_changes_only_summary() {
1924        let dir = tempdir().unwrap();
1925        let path = dir.path().join("records/contacts/sarah.md");
1926        let body = "Long-form operator notes about Sarah.\n";
1927        let fm = Frontmatter {
1928            type_: Some("contact".into()),
1929            summary: Some("old summary".into()),
1930            ..Default::default()
1931        };
1932        write_file(&path, &fm, body).unwrap();
1933
1934        // Read → modify summary → write back.
1935        let (mut fm2, body2) = read_file(&path).unwrap();
1936        fm2.summary = Some("new summary".into());
1937        write_file(&path, &fm2, &body2).unwrap();
1938
1939        let (fm3, body3) = read_file(&path).unwrap();
1940        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
1941        assert_eq!(fm3.type_.as_deref(), Some("contact"));
1942        assert_eq!(body3, body, "body unchanged across the round-trip");
1943    }
1944
1945    #[test]
1946    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
1947        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
1948        // a hand-written file carrying the canonical UNQUOTED scalar link
1949        // `company: [[records/companies/northstar]]`, read from disk then written
1950        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
1951        // value to the bracket-less block sequence `company:\n- - records/...`,
1952        // and every reader (validate/graph/backlinks) then lost the edge.
1953        let dir = tempdir().unwrap();
1954        let path = dir.path().join("records/contacts/sarah-chen.md");
1955        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
1956        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
1957        std::fs::write(&path, file).unwrap();
1958
1959        // Read → write back unchanged (the canonical no-op re-emit).
1960        let (fm, body) = read_file(&path).unwrap();
1961        write_file(&path, &fm, &body).unwrap();
1962
1963        // On-disk bytes still carry the bracketed link, never `- - records/...`.
1964        let raw = std::fs::read_to_string(&path).unwrap();
1965        assert!(
1966            raw.contains("[[records/companies/northstar]]"),
1967            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
1968        );
1969        assert!(
1970            !raw.contains("- - "),
1971            "on-disk value became a nested block sequence; got:\n{raw}"
1972        );
1973
1974        // And the edge is still readable after the round-trip.
1975        let (fm2, _) = read_file(&path).unwrap();
1976        let fields = fm2.link_fields();
1977        let links: Vec<(&str, &str)> = fields
1978            .iter()
1979            .map(|(k, l)| (k.as_str(), l.target.as_str()))
1980            .collect();
1981        assert_eq!(links, vec![("company", "records/companies/northstar")]);
1982    }
1983
1984    #[test]
1985    fn write_file_does_not_leave_temp_files_behind() {
1986        let dir = tempdir().unwrap();
1987        let path = dir.path().join("records/x.md");
1988        let fm = Frontmatter {
1989            type_: Some("note".into()),
1990            ..Default::default()
1991        };
1992        write_file(&path, &fm, "body\n").unwrap();
1993        // The directory should contain only the target file, no `.x.md.tmp.*`.
1994        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
1995            .unwrap()
1996            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
1997            .collect();
1998        assert_eq!(entries, vec!["x.md".to_string()]);
1999    }
2000
2001    // ── is_content_file ──────────────────────────────────────────────────────
2002
2003    #[test]
2004    fn is_content_file_recognizes_layers_and_excludes_meta() {
2005        assert!(Frontmatter::is_content_file(Path::new(
2006            "sources/emails/2026-05-22.md"
2007        )));
2008        assert!(Frontmatter::is_content_file(Path::new(
2009            "records/contacts/sarah-chen.md"
2010        )));
2011        assert!(Frontmatter::is_content_file(Path::new(
2012            "wiki/people/sarah-chen.md"
2013        )));
2014        // Absolute paths under a layer are still content.
2015        assert!(Frontmatter::is_content_file(Path::new(
2016            "/home/db/records/companies/northstar.md"
2017        )));
2018        // index.md at any level is meta.
2019        assert!(!Frontmatter::is_content_file(Path::new(
2020            "records/contacts/index.md"
2021        )));
2022        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2023        // Root meta files.
2024        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2025        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2026    }
2027
2028    // ── effective_id ─────────────────────────────────────────────────────────
2029
2030    #[test]
2031    fn effective_id_prefers_explicit_then_derives_from_path() {
2032        let with_id = Frontmatter {
2033            id: Some("explicit-id".into()),
2034            ..Default::default()
2035        };
2036        assert_eq!(
2037            with_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2038            "explicit-id"
2039        );
2040        let no_id = Frontmatter::default();
2041        assert_eq!(
2042            no_id.effective_id(Path::new("wiki/people/sarah-chen.md")),
2043            "sarah-chen"
2044        );
2045    }
2046
2047    // ── get / set ────────────────────────────────────────────────────────────
2048
2049    #[test]
2050    fn set_routes_universal_and_custom_keys() {
2051        let mut fm = Frontmatter::default();
2052        fm.set("type", "contact").unwrap();
2053        fm.set("summary", "hi").unwrap();
2054        fm.set("company", "[[records/companies/northstar]]")
2055            .unwrap();
2056        assert_eq!(fm.type_.as_deref(), Some("contact"));
2057        assert_eq!(fm.summary.as_deref(), Some("hi"));
2058        // Custom key landed in extra, not a typed slot.
2059        assert_eq!(
2060            fm.extra.get("company").and_then(|v| v.as_str()),
2061            Some("[[records/companies/northstar]]")
2062        );
2063        // get reads from both typed fields and extra.
2064        assert_eq!(
2065            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2066            Some("contact".into())
2067        );
2068        assert_eq!(
2069            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2070            Some("[[records/companies/northstar]]".into())
2071        );
2072        assert!(fm.get("nonexistent").is_none());
2073    }
2074
2075    #[test]
2076    fn set_timestamp_validates_rfc3339() {
2077        let mut fm = Frontmatter::default();
2078        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2079        assert!(fm.created.is_some());
2080        let err = fm.set("updated", "not-a-date").unwrap_err();
2081        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2082    }
2083
2084    // ── extract_wiki_links ───────────────────────────────────────────────────
2085
2086    #[test]
2087    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2088        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[wiki/people/sarah-chen.md|Sarah]].\n";
2089        let links = extract_wiki_links(body, Path::new("doc.md"));
2090        assert_eq!(links.len(), 3);
2091
2092        // Full path, no extension, no display.
2093        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2094        assert!(links[0].is_full_path);
2095        assert!(!links[0].has_md_extension);
2096        assert_eq!(links[0].display, None);
2097        assert_eq!(links[0].location.1, 1, "first link on line 1");
2098
2099        // Short form: not a full path.
2100        assert_eq!(links[1].target, "sarah-chen");
2101        assert!(!links[1].is_full_path, "bare target is short-form");
2102
2103        // Full path WITH .md extension and a display override on line 2.
2104        assert_eq!(links[2].target, "wiki/people/sarah-chen.md");
2105        assert!(links[2].is_full_path);
2106        assert!(links[2].has_md_extension);
2107        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2108        assert_eq!(links[2].location.1, 2);
2109    }
2110
2111    #[test]
2112    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2113        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
2114        let body = "café [[records/x/y]]";
2115        let links = extract_wiki_links(body, Path::new("d.md"));
2116        assert_eq!(links.len(), 1);
2117        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
2118        assert_eq!(links[0].location.2, 6);
2119    }
2120
2121    #[test]
2122    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
2123        let links = extract_wiki_links(
2124            "records/contacts/sarah-chen is not a link",
2125            Path::new("d.md"),
2126        );
2127        assert!(links.is_empty());
2128    }
2129
2130    // ── extract_markdown_links ───────────────────────────────────────────────
2131
2132    #[test]
2133    fn extract_markdown_links_captures_external_and_not_wiki_links() {
2134        let body =
2135            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
2136        let md = extract_markdown_links(body, Path::new("d.md"));
2137        assert_eq!(
2138            md.len(),
2139            1,
2140            "wiki-link must not be captured as a markdown link"
2141        );
2142        assert_eq!(md[0].text, "the thread");
2143        assert_eq!(md[0].url, "https://x.com/a");
2144        assert_eq!(md[0].location.1, 1);
2145
2146        // And the wiki-link extractor must not pick up the markdown link.
2147        let wl = extract_wiki_links(body, Path::new("d.md"));
2148        assert_eq!(wl.len(), 1);
2149        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
2150    }
2151
2152    // ── link_fields ──────────────────────────────────────────────────────────
2153
2154    #[test]
2155    fn link_fields_extracts_scalar_list_and_summary_links() {
2156        // The canonical list form quotes each item so YAML parses it as clean
2157        // strings; a scalar field may be quoted OR written in the canonical
2158        // unquoted inline form `company: [[x]]` (SPEC § Linking).
2159        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
2160        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2161        // Sanity: company really did parse as a scalar string here.
2162        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
2163        let fields = fm.link_fields();
2164
2165        // company (scalar) once, with the right target.
2166        let company: Vec<&str> = fields
2167            .iter()
2168            .filter(|(k, _)| k == "company")
2169            .map(|(_, l)| l.target.as_str())
2170            .collect();
2171        assert_eq!(company, vec!["records/companies/northstar"]);
2172        // attendees (block list) twice.
2173        let attendees: Vec<&str> = fields
2174            .iter()
2175            .filter(|(k, _)| k == "attendees")
2176            .map(|(_, l)| l.target.as_str())
2177            .collect();
2178        assert_eq!(
2179            attendees,
2180            vec!["records/contacts/elena", "records/contacts/sarah"]
2181        );
2182        // summary link surfaced.
2183        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
2184        // Plain-text field is not a link.
2185        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
2186    }
2187
2188    #[test]
2189    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
2190        // Regression: the canonical scalar wiki-link form is the *unquoted*
2191        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
2192        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
2193        // a naive `as_str()`-only walk drops it. link_fields() must still
2194        // surface exactly one link with the correct target.
2195        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
2196        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2197        // Sanity: it really did parse as the nested sequence form, NOT a string.
2198        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2199
2200        let fields = fm.link_fields();
2201        let links: Vec<(&str, &str, Option<&str>)> = fields
2202            .iter()
2203            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2204            .collect();
2205        assert_eq!(
2206            links,
2207            vec![("company", "records/companies/northstar", None)]
2208        );
2209
2210        // The `|display` segment survives the unquoted inline form too.
2211        let fm2 = Frontmatter::parse(
2212            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
2213            Path::new("m.md"),
2214        )
2215        .unwrap();
2216        let f2 = fm2.link_fields();
2217        assert_eq!(f2.len(), 1);
2218        assert_eq!(f2[0].0, "company");
2219        assert_eq!(f2[0].1.target, "records/companies/northstar");
2220        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
2221    }
2222
2223    #[test]
2224    fn link_fields_ignores_plain_one_item_flow_list() {
2225        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
2226        // — one nesting level shallower than an unquoted `[[foo]]` — and must
2227        // NOT be mistaken for a wiki-link.
2228        let yaml = "type: contact\naliases: [foo]";
2229        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2230        assert_eq!(fm.link_fields(), Vec::new());
2231    }
2232
2233    // ── detect_flow_form_link_lists ──────────────────────────────────────────
2234
2235    #[test]
2236    fn detect_flow_form_flags_list_misencodings_not_scalars() {
2237        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
2238        // inline wiki-link (double-nested) is NOT.
2239        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
2240        let flagged = detect_flow_form_link_lists(bad);
2241        assert_eq!(flagged, vec!["attendees".to_string()]);
2242
2243        // An UNquoted block list is also a mis-encoding (parses triple-nested).
2244        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
2245        assert_eq!(
2246            detect_flow_form_link_lists(unquoted_block),
2247            vec!["attendees".to_string()]
2248        );
2249
2250        // The canonical QUOTED block form parses to clean strings — NOT flagged.
2251        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
2252        assert!(detect_flow_form_link_lists(good).is_empty());
2253
2254        // A plain scalar list of strings is not flagged.
2255        let plain = "tags: [a, b, c]";
2256        assert!(detect_flow_form_link_lists(plain).is_empty());
2257    }
2258
2259    // ── extract_sections ─────────────────────────────────────────────────────
2260
2261    #[test]
2262    fn extract_sections_levels_nesting_and_boundaries() {
2263        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
2264        let secs = extract_sections(body);
2265        let headings: Vec<(&str, u8)> =
2266            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
2267        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
2268
2269        // "First" (H2) body extends through its H3 child, stopping at "Second".
2270        let first = &secs[0];
2271        assert!(first.body.contains("alpha"));
2272        assert!(first.body.contains("### Sub"));
2273        assert!(first.body.contains("beta"));
2274        assert!(!first.body.contains("Second"));
2275
2276        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
2277        let sub = &secs[1];
2278        assert!(sub.body.contains("beta"));
2279        assert!(!sub.body.contains("gamma"));
2280
2281        // 1-based line numbers within the body.
2282        assert_eq!(first.line, 2);
2283        assert_eq!(secs[2].line, 6);
2284    }
2285
2286    #[test]
2287    fn extract_sections_ignores_headings_in_fenced_code() {
2288        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
2289        let secs = extract_sections(body);
2290        assert_eq!(secs.len(), 1);
2291        assert_eq!(secs[0].heading, "Real");
2292        // The fenced "## Fake" is part of Real's body, not its own section.
2293        assert!(secs[0].body.contains("## Fake heading in code"));
2294    }
2295
2296    // ── parse_field_spec ─────────────────────────────────────────────────────
2297
2298    #[test]
2299    fn parse_field_spec_required_and_shape() {
2300        let f = parse_field_spec("- email (required, email)");
2301        assert_eq!(f.name, "email");
2302        assert!(f.required);
2303        assert_eq!(f.shape, Some(Shape::Email));
2304        assert!(f.unknown_modifiers.is_empty());
2305    }
2306
2307    #[test]
2308    fn parse_field_spec_link_prefix_strips_trailing_slash() {
2309        let f = parse_field_spec("- company (required, link to records/companies/)");
2310        assert!(f.required);
2311        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
2312        assert_eq!(f.shape, None);
2313    }
2314
2315    #[test]
2316    fn parse_field_spec_default_preserves_case_and_value() {
2317        let f = parse_field_spec("- currency (default USD)");
2318        assert_eq!(f.name, "currency");
2319        assert_eq!(f.default, Some(Value::String("USD".into())));
2320    }
2321
2322    #[test]
2323    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
2324        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
2325        assert!(f.required);
2326        assert_eq!(
2327            f.enum_values,
2328            Some(vec![
2329                "open".to_string(),
2330                "closed".to_string(),
2331                "pending".to_string()
2332            ])
2333        );
2334    }
2335
2336    #[test]
2337    fn parse_field_spec_bare_enum_keyword_is_not_itself_a_value() {
2338        // `enum` with no colon: the values are the remaining tokens; the keyword
2339        // itself must NOT leak in as an allowed value.
2340        let f = parse_field_spec("- status (required, enum, open, closed)");
2341        assert!(f.required);
2342        assert_eq!(
2343            f.enum_values,
2344            Some(vec!["open".to_string(), "closed".to_string()])
2345        );
2346    }
2347
2348    #[test]
2349    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
2350        let f = parse_field_spec("- weird (required, frobnicate, string)");
2351        assert!(f.required);
2352        assert_eq!(f.shape, Some(Shape::String));
2353        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
2354    }
2355
2356    #[test]
2357    fn parse_field_spec_no_parens_is_freeform_optional() {
2358        let f = parse_field_spec("- nickname");
2359        assert_eq!(f.name, "nickname");
2360        assert!(!f.required);
2361        assert_eq!(f.shape, None);
2362        assert!(f.link_prefix.is_none());
2363        assert!(f.enum_values.is_none());
2364        assert!(f.unknown_modifiers.is_empty());
2365    }
2366
2367    // ── parse_schema_bullet (directives) ─────────────────────────────────────
2368
2369    #[test]
2370    fn schema_bullet_unique_single_field() {
2371        match parse_schema_bullet("- unique: email") {
2372            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
2373            other => panic!("expected Unique, got {other:?}"),
2374        }
2375    }
2376
2377    #[test]
2378    fn schema_bullet_unique_compound_trims_and_splits() {
2379        match parse_schema_bullet("- unique: date, amount , vendor") {
2380            SchemaBullet::Unique(fields) => assert_eq!(
2381                fields,
2382                vec![
2383                    "date".to_string(),
2384                    "amount".to_string(),
2385                    "vendor".to_string()
2386                ]
2387            ),
2388            other => panic!("expected Unique, got {other:?}"),
2389        }
2390    }
2391
2392    #[test]
2393    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
2394        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
2395            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
2396            other => panic!("expected SummaryTemplate, got {other:?}"),
2397        }
2398    }
2399
2400    #[test]
2401    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
2402        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
2403        // a directive — its head has a `(` before any `:`.
2404        match parse_schema_bullet("- status (enum: open, closed)") {
2405            SchemaBullet::Field(f) => {
2406                assert_eq!(f.name, "status");
2407                assert_eq!(
2408                    f.enum_values,
2409                    Some(vec!["open".to_string(), "closed".to_string()])
2410                );
2411            }
2412            other => panic!("expected Field, got {other:?}"),
2413        }
2414    }
2415
2416    #[test]
2417    fn parse_db_md_schema_captures_unique_and_summary_template() {
2418        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
2419        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2420        let s = config.schemas.get("contact").expect("contact schema");
2421        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
2422        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
2423        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
2424    }
2425
2426    #[test]
2427    fn schema_bullet_shard_directive_parses_values() {
2428        assert!(matches!(
2429            parse_schema_bullet("- shard: by-date"),
2430            SchemaBullet::Shard(Some(true))
2431        ));
2432        assert!(matches!(
2433            parse_schema_bullet("- shard: flat"),
2434            SchemaBullet::Shard(Some(false))
2435        ));
2436        // An unrecognized value is ignored (None), like an unknown modifier.
2437        assert!(matches!(
2438            parse_schema_bullet("- shard: weekly"),
2439            SchemaBullet::Shard(None)
2440        ));
2441        // A field whose name has a `(` before any `:` is still a field — the same
2442        // guard that keeps `- status (enum: a, b)` a field, not a directive.
2443        assert!(matches!(
2444            parse_schema_bullet("- shardiness (string)"),
2445            SchemaBullet::Field(_)
2446        ));
2447    }
2448
2449    #[test]
2450    fn parse_db_md_schema_captures_shard_directive() {
2451        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### shipment\n- carrier (string)\n- shard: by-date\n\n### contact\n- shard: flat\n";
2452        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
2453        let shipment = config.schemas.get("shipment").expect("shipment schema");
2454        assert_eq!(shipment.shard, Some(true));
2455        assert_eq!(
2456            shipment.fields.len(),
2457            1,
2458            "`shard:` is a directive, not a field"
2459        );
2460        assert_eq!(config.schemas.get("contact").unwrap().shard, Some(false));
2461    }
2462
2463    // ── parse_db_md ──────────────────────────────────────────────────────────
2464
2465    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `wiki/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
2466
2467    #[test]
2468    fn parse_db_md_extracts_all_canonical_sections() {
2469        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
2470
2471        // Agent instructions: free-form prose, heading line stripped.
2472        let ai = config
2473            .agent_instructions
2474            .expect("agent instructions present");
2475        assert!(ai.starts_with("Prioritize creating"));
2476        assert!(!ai.contains("## Agent instructions"));
2477
2478        // Frozen pages: paths extracted from backticked bullets, comments dropped.
2479        assert_eq!(
2480            config.frozen_pages,
2481            vec![
2482                PathBuf::from("records/decisions/2026-q1-strategy.md"),
2483                PathBuf::from("wiki/synthesis/2026-annual-plan.md"),
2484            ]
2485        );
2486
2487        // Ignored types: comma list, backticks/comment stripped.
2488        assert_eq!(
2489            config.ignored_types,
2490            vec!["test".to_string(), "temp".to_string()]
2491        );
2492
2493        // Schemas: two types, each with its fields in source order.
2494        assert_eq!(config.schemas.len(), 2);
2495        let contact = config.schemas.get("contact").expect("contact schema");
2496        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
2497        assert_eq!(names, vec!["name", "email", "company", "role"]);
2498        assert!(contact.fields[0].required); // name
2499        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
2500        assert_eq!(
2501            contact.fields[2].link_prefix,
2502            Some(PathBuf::from("records/companies"))
2503        ); // company
2504
2505        let expense = config.schemas.get("expense").expect("expense schema");
2506        let cur = expense
2507            .fields
2508            .iter()
2509            .find(|f| f.name == "currency")
2510            .unwrap();
2511        assert_eq!(cur.default, Some(Value::String("USD".into())));
2512    }
2513
2514    #[test]
2515    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
2516        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
2517        // unknown modifier, and bullets that appear with NO `### <type>`
2518        // heading (so they belong to no schema and are dropped).
2519        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
2520        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2521
2522        // The orphan bullet under `## Schemas` with no `### type` heading is not
2523        // captured as a schema.
2524        assert_eq!(config.schemas.len(), 1);
2525        let ticket = config.schemas.get("ticket").expect("ticket schema");
2526        assert_eq!(ticket.fields.len(), 2);
2527
2528        let priority = &ticket.fields[0];
2529        assert!(priority.required);
2530        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
2531        assert_eq!(
2532            priority.enum_values,
2533            Some(vec!["low".to_string(), "high".to_string()])
2534        );
2535
2536        // A bullet with an unclosed paren still yields a usable name.
2537        let broken = &ticket.fields[1];
2538        assert_eq!(broken.name, "broken");
2539    }
2540
2541    #[test]
2542    fn parse_db_md_missing_frontmatter_errors() {
2543        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
2544        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
2545        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2546    }
2547
2548    #[test]
2549    fn parse_db_md_absent_sections_default_empty() {
2550        let text = "---\ntype: db-md\n---\n\n# Title only\n";
2551        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
2552        assert_eq!(config, Config::default());
2553    }
2554
2555    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
2556
2557    /// `Frontmatter::set` is the value path every write surface (`fm set`,
2558    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
2559    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
2560    /// quoted links — readable back by [`links_in_field_value`] and accepted by
2561    /// `dbmd validate` — never the flow-form scalar string that trips
2562    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
2563    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
2564    #[test]
2565    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
2566        for value in [
2567            "[[[records/contacts/a]], [[records/contacts/b]]]",
2568            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
2569        ] {
2570            let mut fm = Frontmatter::default();
2571            fm.set("attendees", value).unwrap();
2572
2573            // Stored as a 2-element sequence of clean quoted links.
2574            let stored = fm.extra.get("attendees").expect("attendees set");
2575            let Value::Sequence(items) = stored else {
2576                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
2577            };
2578            assert_eq!(items.len(), 2, "input {value}");
2579            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
2580            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
2581
2582            // The edge enumerator reads exactly the two links back (no stray
2583            // bracket targets, the flow-form-string symptom).
2584            let links: Vec<_> = links_in_field_value(stored)
2585                .into_iter()
2586                .map(|l| l.target)
2587                .collect();
2588            assert_eq!(
2589                links,
2590                vec!["records/contacts/a", "records/contacts/b"],
2591                "input {value}"
2592            );
2593
2594            // And the canonical writer renders it block-style, not as a scalar.
2595            let yaml = fm.to_yaml();
2596            assert!(
2597                yaml.contains("attendees:\n"),
2598                "expected block list in:\n{yaml}"
2599            );
2600            assert!(
2601                !yaml.contains("attendees: '[["),
2602                "must not be a flow-form scalar string in:\n{yaml}"
2603            );
2604        }
2605    }
2606
2607    /// A *single* inline wiki-link stays a scalar string (renders inline
2608    /// `field: [[x]]`), and a single link must never be widened to a one-item
2609    /// list — preserving the common `contact.company` / `expense.vendor` shape.
2610    #[test]
2611    fn set_single_inline_wiki_link_stays_scalar() {
2612        let mut fm = Frontmatter::default();
2613        fm.set("company", "[[records/companies/tideform]]").unwrap();
2614        assert_eq!(
2615            fm.extra.get("company"),
2616            Some(&Value::String("[[records/companies/tideform]]".into())),
2617        );
2618        // Still recognized as one link.
2619        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
2620            .into_iter()
2621            .map(|l| l.target)
2622            .collect();
2623        assert_eq!(links, vec!["records/companies/tideform"]);
2624    }
2625
2626    /// Plain text and a non-link flow list are left as verbatim scalar strings —
2627    /// the list normalization only triggers when every item is a clean wiki-link.
2628    #[test]
2629    fn set_non_link_values_stay_scalar_strings() {
2630        let mut fm = Frontmatter::default();
2631        fm.set("location", "Video call (remote)").unwrap();
2632        assert_eq!(
2633            fm.extra.get("location"),
2634            Some(&Value::String("Video call (remote)".into())),
2635        );
2636
2637        // A flow list whose items are NOT wiki-links must not be reinterpreted as
2638        // a link sequence; it stays the scalar string the agent passed.
2639        fm.set("note", "[draft, wip]").unwrap();
2640        assert_eq!(
2641            fm.extra.get("note"),
2642            Some(&Value::String("[draft, wip]".into()))
2643        );
2644    }
2645}