dbmd_core/
parser.rs

1//! `parser` — read and write db.md markdown files.
2//!
3//! Parses the YAML frontmatter block, the markdown body, wiki-links, standard
4//! markdown links, `##` sections, and the structured sections of the `DB.md`
5//! config file. Also the atomic writer that round-trips a file while
6//! preserving the operator-edited body verbatim and emitting frontmatter in
7//! canonical key order.
8//!
9//! Strict on required fields, lenient on unknowns: any frontmatter key the
10//! spec doesn't recognize is preserved in [`Frontmatter::extra`] as ambient
11//! context and round-tripped untouched.
12
13use std::collections::BTreeMap;
14use std::path::{Path, PathBuf};
15
16use chrono::{DateTime, FixedOffset};
17use serde_norway::{Mapping, Value};
18
19/// The two canonical layer folder names. A path is "content" / a wiki-link is
20/// "full-path" only when it resolves under one of these.
21const LAYER_DIRS: [&str; 2] = ["sources", "records"];
22
23/// Errors produced while parsing a markdown file or the `DB.md` config.
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    /// The frontmatter block was not valid YAML. Maps to validate code
27    /// `FM_MALFORMED_YAML`.
28    #[error("malformed YAML frontmatter in {file}: {source}")]
29    MalformedYaml {
30        /// The file whose frontmatter failed to parse.
31        file: PathBuf,
32        /// The underlying YAML error.
33        source: serde_norway::Error,
34    },
35
36    /// The file has no `---`-delimited frontmatter block at its very start.
37    #[error("missing frontmatter block in {file}")]
38    MissingFrontmatter {
39        /// The offending file.
40        file: PathBuf,
41    },
42
43    /// A required field was absent. Maps to validate code `FM_MISSING_TYPE`
44    /// (for `type`) and the per-type required-field codes.
45    #[error("missing required field '{key}' in {file}")]
46    MissingField {
47        /// The file missing the field.
48        file: PathBuf,
49        /// The required key.
50        key: String,
51    },
52
53    /// A timestamp field was not ISO-8601 / RFC3339. Maps to `FM_BAD_TIMESTAMP`.
54    #[error("bad timestamp in field '{key}' of {file}: {value}")]
55    BadTimestamp {
56        /// The file.
57        file: PathBuf,
58        /// The frontmatter key.
59        key: String,
60        /// The unparseable value.
61        value: String,
62    },
63
64    /// An I/O error reading the file.
65    #[error(transparent)]
66    Io(#[from] std::io::Error),
67}
68
69/// The parsed YAML frontmatter of a db.md file.
70///
71/// The universal-contract fields are typed accessors; everything else lands in
72/// [`extra`](Frontmatter::extra) as ambient context (unknown-field passthrough)
73/// and is round-tripped verbatim. The atomic writer re-emits keys in canonical
74/// order: `type`, `id`, `created`, `updated`, `summary` first, then
75/// type-specific fields, then `status` / `tags`.
76#[derive(Debug, Clone, Default, PartialEq)]
77pub struct Frontmatter {
78    /// `type` — required on content files; the primary query key.
79    pub type_: Option<String>,
80    /// `meta-type` — records-only; the epistemic class `fact`/`operational`/
81    /// `conclusion`. Absent ⇒ `fact` (the effective default is applied by the
82    /// index/query layer for record-layer files; sources carry none).
83    pub meta_type: Option<String>,
84    /// `id` — optional; derived from the file path when absent.
85    pub id: Option<String>,
86    /// `created` — RFC3339; required and auto-set on content-file create.
87    pub created: Option<DateTime<FixedOffset>>,
88    /// `updated` — RFC3339; required and auto-maintained on content files.
89    pub updated: Option<DateTime<FixedOffset>>,
90    /// `summary` — the one-line catalog line; required on every content file.
91    pub summary: Option<String>,
92    /// `status` — optional lifecycle state.
93    pub status: Option<String>,
94    /// `tags` — optional flat list of short scalar labels.
95    pub tags: Vec<String>,
96    /// All other frontmatter keys (type-specific + custom), preserved verbatim
97    /// in insertion-stable sorted order. Wiki-link-valued fields keep their raw
98    /// YAML form here; [`Frontmatter::link_fields`] surfaces them as
99    /// [`WikiLink`]s.
100    pub extra: BTreeMap<String, Value>,
101}
102
103/// Does `s` contain a run of at least `min` consecutive ASCII digits? A cheap
104/// guard so [`quote_oversized_integers`] only does real work when an oversized
105/// literal is even possible (`i64::MAX` is 19 digits, `u64::MAX` is 20).
106fn has_long_digit_run(s: &str, min: usize) -> bool {
107    let mut run = 0usize;
108    for b in s.bytes() {
109        if b.is_ascii_digit() {
110            run += 1;
111            if run >= min {
112                return true;
113            }
114        } else {
115            run = 0;
116        }
117    }
118    false
119}
120
121/// True if `s` is a bare decimal integer literal whose magnitude exceeds the
122/// `i64`/`u64` range `serde_norway` can represent losslessly — exactly the
123/// literals it either rejects (`(u64::MAX, u128::MAX]`) or silently truncates to
124/// `f64` (`> u128::MAX`). A canonical (no leading zero) decimal only, so an
125/// octal/leading-zero/typed scalar is never reinterpreted.
126fn is_oversized_int_literal(s: &str) -> bool {
127    let t = s.trim();
128    if t.is_empty() {
129        return false;
130    }
131    let (neg, body) = match t.strip_prefix('-') {
132        Some(b) => (true, b),
133        None => (false, t.strip_prefix('+').unwrap_or(t)),
134    };
135    if body.is_empty() || !body.bytes().all(|b| b.is_ascii_digit() || b == b'_') {
136        return false;
137    }
138    let digits: String = body
139        .bytes()
140        .filter(|b| *b != b'_')
141        .map(|b| b as char)
142        .collect();
143    if digits.is_empty() {
144        return false; // all underscores
145    }
146    // Leading-zero decimals (`007`) are version-ambiguous (octal vs int vs
147    // string); never touch them.
148    if digits.len() > 1 && digits.starts_with('0') {
149        return false;
150    }
151    let canon = if neg { format!("-{digits}") } else { digits };
152    // Fits i64 / u64 → handled losslessly; leave untouched.
153    if canon.parse::<i64>().is_ok() || (!neg && canon.parse::<u64>().is_ok()) {
154        return false;
155    }
156    true
157}
158
159/// Byte index where the scalar VALUE begins on a simple block line
160/// (`key: <value>`, `- <value>`, or `- key: <value>`), or `None` when the line
161/// bears no inline value (a bare `key:` / lone `-` / indent-only line).
162fn scalar_value_start(content: &str) -> Option<usize> {
163    let mut base = content.len() - content.trim_start().len();
164    let mut rest = &content[base..];
165    // Consume leading `- ` block-sequence markers (possibly nested: `- - x`).
166    while let Some(after) = rest.strip_prefix("- ") {
167        base += rest.len() - after.len();
168        let trimmed = after.trim_start_matches(' ');
169        base += after.len() - trimmed.len();
170        rest = trimmed;
171    }
172    if rest.is_empty() || rest == "-" {
173        return None;
174    }
175    // `key: value` — first `:` followed by a space/tab introduces the value.
176    if let Some(colon) = rest.find(':') {
177        let after = &rest[colon + 1..];
178        if after.starts_with(' ') || after.starts_with('\t') {
179            let val = after.trim_start_matches([' ', '\t']);
180            return Some(base + colon + 1 + (after.len() - val.len()));
181        }
182        if after.is_empty() {
183            return None; // `key:` with the value on following (block) lines
184        }
185    }
186    // A bare sequence-item scalar: the value is the whole remainder.
187    Some(base)
188}
189
190/// True if `content` introduces a YAML block scalar (`key: |`, `- >2`, …): the
191/// value region begins with a `|` or `>` indicator. Its body must be skipped by
192/// [`quote_oversized_integers`] so a digit line inside literal text is untouched.
193fn introduces_block_scalar(content: &str) -> bool {
194    match scalar_value_start(content) {
195        Some(start) => {
196            let v = content[start..].trim_start();
197            v.starts_with('|') || v.starts_with('>')
198        }
199        None => false,
200    }
201}
202
203/// Quote an oversized bare-integer value on a single block line, returning the
204/// rewritten line, or `None` if the line carries no such value.
205fn quote_int_value_in_line(content: &str) -> Option<String> {
206    let value_start = scalar_value_start(content)?;
207    let region = &content[value_start..];
208    let value = region.trim_end();
209    if !is_oversized_int_literal(value) {
210        return None;
211    }
212    // A pure digit literal contains no `'`, so single-quoting needs no escaping.
213    let trailing = &region[value.len()..];
214    Some(format!(
215        "{}'{}'{}",
216        &content[..value_start],
217        value,
218        trailing
219    ))
220}
221
222/// Pre-quote bare integer literals beyond the `i64`/`u64` range so they parse as
223/// STRING scalars and round-trip verbatim.
224///
225/// `serde_norway` (no arbitrary-precision) cannot represent such an integer: it
226/// rejects `(u64::MAX, u128::MAX]` as a hard parse error and silently truncates
227/// `> u128::MAX` to `f64` (`999…9` → `1e39` on the next re-emit) — corrupting an
228/// imported numeric ID and breaking the SPEC guarantee that unknown fields
229/// round-trip byte-for-byte. Quoting them up front makes them string-valued (the
230/// type narrows from number to string, but no data is destroyed).
231///
232/// Conservative: only a single-line `key: <int>` / `- <int>` / `- key: <int>`
233/// value that is a canonical decimal integer beyond `i64`/`u64` is quoted; block
234/// scalars are tracked and never touched; anything already in range, quoted, or
235/// not a bare integer is left exactly as written.
236fn quote_oversized_integers(yaml: &str) -> std::borrow::Cow<'_, str> {
237    if !has_long_digit_run(yaml, 19) {
238        return std::borrow::Cow::Borrowed(yaml);
239    }
240    let mut out = String::with_capacity(yaml.len());
241    let mut changed = false;
242    let mut block_indent: Option<usize> = None;
243    for line in yaml.split_inclusive('\n') {
244        let content = line.trim_end_matches(['\r', '\n']);
245        let term = &line[content.len()..];
246        let indent = content.len() - content.trim_start().len();
247
248        // Inside a block scalar: emit verbatim until a non-blank line dedents to
249        // at or before the introducer's key indent.
250        if let Some(key_indent) = block_indent {
251            if content.trim().is_empty() || indent > key_indent {
252                out.push_str(line);
253                continue;
254            }
255            block_indent = None; // block ended; process this line normally
256        }
257        if introduces_block_scalar(content) {
258            block_indent = Some(indent);
259            out.push_str(line);
260            continue;
261        }
262        match quote_int_value_in_line(content) {
263            Some(rewritten) => {
264                out.push_str(&rewritten);
265                out.push_str(term);
266                changed = true;
267            }
268            None => out.push_str(line),
269        }
270    }
271    if changed {
272        std::borrow::Cow::Owned(out)
273    } else {
274        std::borrow::Cow::Borrowed(yaml)
275    }
276}
277
278impl Frontmatter {
279    /// Parse a YAML frontmatter block (the text between the opening and closing
280    /// `---` fences, exclusive) into a [`Frontmatter`].
281    ///
282    /// Lenient on unknown keys (they go to [`extra`](Frontmatter::extra));
283    /// returns [`ParseError::MalformedYaml`] only on YAML that doesn't parse.
284    pub fn parse(yaml: &str, file: &Path) -> Result<Self, ParseError> {
285        // An empty (or whitespace-only) frontmatter block is a valid, empty
286        // mapping — not a YAML error.
287        let value: Value = if yaml.trim().is_empty() {
288            Value::Mapping(Mapping::new())
289        } else {
290            // Preserve integer literals beyond i64/u64 range: serde_norway would
291            // otherwise reject `(u64,u128]` or silently truncate `>u128` to f64,
292            // corrupting imported numeric IDs. Quoting them up front makes them
293            // round-trip verbatim as strings.
294            let prepared = quote_oversized_integers(yaml);
295            serde_norway::from_str(&prepared).map_err(|source| ParseError::MalformedYaml {
296                file: file.to_path_buf(),
297                source,
298            })?
299        };
300
301        // Top-level frontmatter must be a mapping. A scalar or sequence at the
302        // top level is malformed for our purposes; surface it as such.
303        let map = match value {
304            Value::Mapping(m) => m,
305            Value::Null => Mapping::new(),
306            other => {
307                // serde_norway::Error has no public constructor, so let the
308                // deserializer decide: a value that coerces to a Mapping (e.g. a
309                // YAML-tagged mapping `!tag\n k: v`, where the tag is ambient) is
310                // accepted as that mapping; a genuine scalar or sequence top
311                // level fails to coerce and IS the malformed case. (Using a
312                // match here, not `expect_err`, avoids a panic on the
313                // tagged-mapping case, which deserializes to a Mapping just
314                // fine.)
315                match serde_norway::from_value::<Mapping>(other) {
316                    Ok(m) => m,
317                    Err(source) => {
318                        return Err(ParseError::MalformedYaml {
319                            file: file.to_path_buf(),
320                            source,
321                        });
322                    }
323                }
324            }
325        };
326
327        let mut fm = Frontmatter::default();
328        for (k, v) in map {
329            let key = match k.as_str() {
330                Some(s) => s.to_string(),
331                // Non-string keys (`2026:`, `true:`, `3.14:`) are unusual but
332                // valid YAML; per SPEC § "Unknown fields pass through" they must
333                // not be corrupted on re-emit. Stringify them through the YAML
334                // scalar emitter — `2026`, `true`, `3.14` — NOT the Rust `Debug`
335                // formatter (which produced `Number(2026)`, `Bool(true)`, …), so
336                // the key text survives. `extra` is `String`-keyed, so on the
337                // write side the key re-emits as a quoted-string key carrying that
338                // text (e.g. `'2026':`) — the type narrows from number to string,
339                // but the data is no longer destroyed and ordinary string keys are
340                // wholly unaffected.
341                None => yaml_scalar_key(&k),
342            };
343            match key.as_str() {
344                // Coerce scalar values rather than `v.as_str()` (which is None
345                // for Number/Bool/Null). A bare scalar that YAML reads as a
346                // non-string — `summary: 2026`, `id: 100`, `status: 0` — would
347                // otherwise be set to None AND dropped (it is a matched arm, so
348                // the raw value never reaches `extra`), and `to_yaml` then omits
349                // the None field, so `dbmd format` (read_file -> write_file)
350                // silently deletes the line from disk. `scalar_string` mirrors
351                // the coercion `validate`/`store` already apply to these fields,
352                // so a numeric/bool-looking scalar is preserved as its string
353                // form and round-trips instead of being destroyed.
354                //
355                // A sequence/mapping value on a universal key (`status: [a, b]`,
356                // a nested-mapping `summary:`) is NOT a valid scalar; rather than
357                // let the matched arm consume-and-drop it (silent data loss on
358                // the next re-emit), `scalar_string` returns None and we fall
359                // through to preserving the raw value in `extra` so `to_yaml`
360                // re-emits it verbatim. The universal accessors stay None (the
361                // value was never a valid scalar for that field), but the
362                // operator's bytes are never destroyed.
363                "type" => match scalar_string(&v) {
364                    Some(s) => fm.type_ = Some(s),
365                    None => {
366                        fm.extra.insert(key, v);
367                    }
368                },
369                "meta-type" => match scalar_string(&v) {
370                    Some(s) => fm.meta_type = Some(s),
371                    None => {
372                        fm.extra.insert(key, v);
373                    }
374                },
375                "id" => match scalar_string(&v) {
376                    Some(s) => fm.id = Some(s),
377                    None => {
378                        fm.extra.insert(key, v);
379                    }
380                },
381                "created" => fm.created = parse_timestamp(&v, "created", file)?,
382                "updated" => fm.updated = parse_timestamp(&v, "updated", file)?,
383                "summary" => match scalar_string(&v) {
384                    Some(s) => fm.summary = Some(s),
385                    None => {
386                        fm.extra.insert(key, v);
387                    }
388                },
389                "status" => match scalar_string(&v) {
390                    Some(s) => fm.status = Some(s),
391                    None => {
392                        fm.extra.insert(key, v);
393                    }
394                },
395                "tags" => match parse_tags_preserving(&v) {
396                    Ok(tags) => fm.tags = tags,
397                    // A `tags` value with a non-scalar item (`tags: [[vip]]`,
398                    // `tags: [a, [b]]`) is preserved verbatim in `extra` rather
399                    // than silently filtered down / erased on re-emit. The typed
400                    // `tags` vec stays empty (no valid scalar list was present),
401                    // so `to_yaml` won't ALSO emit a `tags:` from the vec.
402                    Err(raw) => {
403                        fm.extra.insert(key, raw);
404                    }
405                },
406                _ => {
407                    fm.extra.insert(key, v);
408                }
409            }
410        }
411        Ok(fm)
412    }
413
414    /// Serialize the frontmatter back to a YAML block (no `---` fences) in
415    /// canonical key order. Round-trips [`extra`](Frontmatter::extra) verbatim.
416    pub fn to_yaml(&self) -> String {
417        // Build an order-preserving mapping in canonical key order:
418        //   type, meta-type, id, created, updated, summary  (universal head)
419        //   <type-specific extra, BTreeMap-sorted>
420        //   status, tags                          (universal tail)
421        // serde_norway::Mapping preserves insertion order, so one serialize call
422        // emits the block in exactly this order with correct YAML quoting.
423        let mut map = Mapping::new();
424
425        if let Some(t) = &self.type_ {
426            map.insert(Value::String("type".into()), Value::String(t.clone()));
427        }
428        if let Some(mt) = &self.meta_type {
429            map.insert(Value::String("meta-type".into()), Value::String(mt.clone()));
430        }
431        if let Some(id) = &self.id {
432            map.insert(Value::String("id".into()), Value::String(id.clone()));
433        }
434        if let Some(created) = &self.created {
435            map.insert(
436                Value::String("created".into()),
437                Value::String(created.to_rfc3339()),
438            );
439        }
440        if let Some(updated) = &self.updated {
441            map.insert(
442                Value::String("updated".into()),
443                Value::String(updated.to_rfc3339()),
444            );
445        }
446        if let Some(summary) = &self.summary {
447            map.insert(
448                Value::String("summary".into()),
449                Value::String(summary.clone()),
450            );
451        }
452
453        // Type-specific + custom fields, in BTreeMap (sorted) order. Each value
454        // is canonicalized so a wiki-link round-trips to the form the writer and
455        // `dbmd validate` agree on — critically, the SPEC-canonical *unquoted*
456        // scalar `field: [[x]]` (which YAML parses to a nested `Seq[Seq[String]]`)
457        // is re-emitted as a quoted scalar `'[[x]]'` instead of the bracket-less
458        // block sequence `- - x` that a verbatim re-emit would produce and that
459        // destroys the link. See [`canonicalize_extra_value`].
460        for (k, v) in &self.extra {
461            map.insert(Value::String(k.clone()), canonicalize_extra_value(v));
462        }
463
464        if let Some(status) = &self.status {
465            map.insert(
466                Value::String("status".into()),
467                Value::String(status.clone()),
468            );
469        }
470        if !self.tags.is_empty() {
471            map.insert(
472                Value::String("tags".into()),
473                Value::Sequence(self.tags.iter().cloned().map(Value::String).collect()),
474            );
475        }
476
477        if map.is_empty() {
478            return String::new();
479        }
480        serde_norway::to_string(&Value::Mapping(map)).unwrap_or_default()
481    }
482
483    /// True if the file is content (under `sources/` or `records/`)
484    /// and not an `index.md`. Used by validate to decide which files require a
485    /// `summary`. Meta files (`DB.md`, `index.md`, `log.md`) return false.
486    pub fn is_content_file(path: &Path) -> bool {
487        // index.md is a meta file at every level, never content.
488        if path.file_name().and_then(|n| n.to_str()) == Some("index.md") {
489            return false;
490        }
491        // Content iff some path component is one of the two layer dirs. This
492        // works for both store-relative (`sources/emails/x.md`) and absolute
493        // (`/home/db/sources/emails/x.md`) paths. DB.md / log.md sit at the
494        // root, under no layer, so they fall through to false.
495        path.components().any(|c| {
496            c.as_os_str()
497                .to_str()
498                .is_some_and(|s| LAYER_DIRS.contains(&s))
499        })
500    }
501
502    /// Resolve the file's effective `id`: the explicit `id` field if present,
503    /// otherwise derived from the store-relative path (filename without `.md`).
504    pub fn effective_id(&self, store_relative_path: &Path) -> String {
505        if let Some(id) = &self.id {
506            if !id.is_empty() {
507                return id.clone();
508            }
509        }
510        // Derived id = filename without the `.md` extension.
511        store_relative_path
512            .file_stem()
513            .and_then(|s| s.to_str())
514            .unwrap_or_default()
515            .to_string()
516    }
517
518    /// The effective `meta-type` for a record: the declared value, or `fact`
519    /// when absent. Records only — sources carry no meta-type; callers apply
520    /// this only to record-layer files.
521    pub fn effective_meta_type(&self) -> &str {
522        self.meta_type.as_deref().unwrap_or("fact")
523    }
524
525    /// Read a single frontmatter key as a raw YAML [`Value`], looking in the
526    /// typed fields first and then [`extra`](Frontmatter::extra).
527    pub fn get(&self, key: &str) -> Option<Value> {
528        match key {
529            "type" => self.type_.clone().map(Value::String),
530            "meta-type" => self.meta_type.clone().map(Value::String),
531            "id" => self.id.clone().map(Value::String),
532            "created" => self.created.map(|d| Value::String(d.to_rfc3339())),
533            "updated" => self.updated.map(|d| Value::String(d.to_rfc3339())),
534            "summary" => self.summary.clone().map(Value::String),
535            "status" => self.status.clone().map(Value::String),
536            "tags" => {
537                if self.tags.is_empty() {
538                    None
539                } else {
540                    Some(Value::Sequence(
541                        self.tags.iter().cloned().map(Value::String).collect(),
542                    ))
543                }
544            }
545            _ => self.extra.get(key).cloned(),
546        }
547    }
548
549    /// Set a single frontmatter key from a string value, routing universal-
550    /// contract keys to their typed fields and everything else to
551    /// [`extra`](Frontmatter::extra). Used by `dbmd fm set`.
552    pub fn set(&mut self, key: &str, value: &str) -> Result<(), ParseError> {
553        match key {
554            "type" => self.type_ = Some(value.to_string()),
555            "meta-type" => self.meta_type = Some(value.to_string()),
556            "id" => self.id = Some(value.to_string()),
557            "created" => {
558                self.created = Some(parse_rfc3339(value, "created", Path::new("<fm set>"))?)
559            }
560            "updated" => {
561                self.updated = Some(parse_rfc3339(value, "updated", Path::new("<fm set>"))?)
562            }
563            "summary" => self.summary = Some(value.to_string()),
564            "status" => self.status = Some(value.to_string()),
565            "tags" => {
566                // Accept either a YAML flow list (`[a, b]`) or a single scalar
567                // tag. Anything that parses to a sequence becomes the tag list;
568                // otherwise the whole string is one tag.
569                self.tags = match serde_norway::from_str::<Value>(value) {
570                    Ok(Value::Sequence(seq)) => parse_tags(&Value::Sequence(seq)),
571                    _ => vec![value.to_string()],
572                };
573            }
574            _ => {
575                // A custom / type-specific field. The value is a scalar string by
576                // default, but the spec's list-valued link fields (e.g.
577                // `meeting.attendees`, SPEC § Linking) must serialize as a YAML
578                // block sequence of quoted wiki-links — never the flow-form string
579                // `"[[[a]], [[b]]]"`, which `dbmd validate` rejects as
580                // `WIKI_LINK_FLOW_FORM_LIST`. When the value parses as a YAML
581                // sequence whose every item is a clean single wiki-link, store the
582                // canonical sequence so `to_yaml` emits block form. Everything else
583                // — plain text, and a single inline `[[x]]` (which YAML reads as a
584                // nested `Seq[Seq[String]]`, not a list of link strings) — stays a
585                // verbatim scalar string, preserving the prior behavior.
586                let stored = parse_link_list_value(value)
587                    .unwrap_or_else(|| Value::String(value.to_string()));
588                self.extra.insert(key.to_string(), stored);
589            }
590        }
591        Ok(())
592    }
593
594    /// Extract every frontmatter field whose value is a wiki-link (scalar
595    /// inline form or a block-sequence list), pairing each with its key. The
596    /// validate engine checks these against `(link)` schema annotations.
597    pub fn link_fields(&self) -> Vec<(String, WikiLink)> {
598        let mut out = Vec::new();
599        // `summary` may carry navigational wiki-links (spec encourages it).
600        if let Some(summary) = &self.summary {
601            for link in extract_wiki_links(summary, Path::new("")) {
602                out.push(("summary".to_string(), link));
603            }
604        }
605        // Every type-specific / custom field: a scalar wiki-link or a list of
606        // wiki-links, in either the quoted (`"[[x]]"`) or the canonical unquoted
607        // (`[[x]]`) form. See [`links_in_field_value`] for the YAML shapes.
608        for (key, value) in &self.extra {
609            for link in links_in_field_value(value) {
610                out.push((key.clone(), link));
611            }
612        }
613        out
614    }
615}
616
617/// A wiki-link reference inside the store: `[[target]]` or `[[target|display]]`.
618///
619/// `target` is always recorded as written; [`is_full_path`](WikiLink::is_full_path)
620/// flags whether it's a full store-relative path (the doctrine) versus a
621/// short-form (a validation error).
622#[derive(Debug, Clone, PartialEq, Eq)]
623pub struct WikiLink {
624    /// The link target as written, without the `[[ ]]` and without `|display`.
625    pub target: String,
626    /// The optional `|display` text override.
627    pub display: Option<String>,
628    /// True when `target` is a full store-relative path (contains a `/` and
629    /// resolves under a known layer); false for short-form targets like
630    /// `sarah-chen` — which validate reports as `WIKI_LINK_SHORT_FORM`.
631    pub is_full_path: bool,
632    /// True when `target` carries a trailing `.md` extension — validate warns
633    /// `WIKI_LINK_HAS_EXTENSION`; the canonical writers emit the bare form.
634    pub has_md_extension: bool,
635    /// Where the link appears: `(file, line, col)`, 1-based line and column.
636    pub location: (PathBuf, u32, u32),
637}
638
639/// A standard markdown link `[text](url)` — an external reference, kept in a
640/// stream separate from [`WikiLink`] so external targets are visible to the
641/// toolkit without being conflated with in-store edges. Not graph-validated.
642#[derive(Debug, Clone, PartialEq, Eq)]
643pub struct MarkdownLink {
644    /// The link text inside `[ ]`.
645    pub text: String,
646    /// The URL or path inside `( )`.
647    pub url: String,
648    /// Where the link appears: `(file, line, col)`, 1-based.
649    pub location: (PathBuf, u32, u32),
650}
651
652/// A `##`/`###` section of a markdown body: the heading text plus the byte
653/// slice of the body it spans (heading line through the line before the next
654/// heading of equal-or-shallower depth).
655#[derive(Debug, Clone, PartialEq, Eq)]
656pub struct Section {
657    /// The heading text (without the leading `#`s).
658    pub heading: String,
659    /// Heading depth (number of leading `#`s).
660    pub level: u8,
661    /// The 1-based line where the heading appears.
662    pub line: u32,
663    /// The section body, from the heading line to the next sibling-or-shallower
664    /// heading (exclusive), as a slice of the original body.
665    pub body: String,
666}
667
668/// The parsed structured content of a store's `DB.md` config file.
669///
670/// All four parts are optional in the source; absent parts fall back to spec
671/// defaults. Produced by [`parse_db_md`].
672#[derive(Debug, Clone, Default, PartialEq)]
673pub struct Config {
674    /// Body of the `## Agent instructions` section — free-form prose passed to
675    /// the agent's system prompt.
676    pub agent_instructions: Option<String>,
677    /// `## Policies` → `### Frozen pages`: store-relative paths the toolkit
678    /// refuses to write (`POLICY_FROZEN_PAGE`).
679    pub frozen_pages: Vec<PathBuf>,
680    /// `## Policies` → `### Ignored types`: type names the curator never
681    /// synthesizes (still readable as ambient context).
682    pub ignored_types: Vec<String>,
683    /// `## Schemas` → one entry per `### <type>` sub-section.
684    pub schemas: BTreeMap<String, Schema>,
685    /// `## Folders` → optional per-folder display + description, surfaced in the
686    /// root + layer `index.md` rollups. Agent-authored; the tool never invents a
687    /// folder's description (absent ⇒ the rollup shows counts only). Keyed by the
688    /// store-relative, unix-slash folder path (e.g. `records/contacts`).
689    pub folders: BTreeMap<String, FolderMeta>,
690}
691
692/// Agent-authored display + description for one type-folder, declared in
693/// `DB.md ## Folders` and surfaced in the root/layer `index.md` rollups. Both
694/// fields are optional: `display` overrides the rollup's derived folder name
695/// (for casing the tool can't guess, e.g. acronyms like HubSpot); `description`
696/// is the one-line "what's in here" the rollup shows. The tool only ever
697/// *surfaces* these — it never composes a folder description from the folder's
698/// contents (that would be the tool inventing the curator's judgment).
699#[derive(Debug, Clone, Default, PartialEq, Eq)]
700pub struct FolderMeta {
701    /// Display-name override (absent ⇒ derived from the folder basename).
702    pub display: Option<String>,
703    /// One-line folder description shown in the rollup (absent ⇒ counts only).
704    pub description: Option<String>,
705}
706
707impl Config {
708    /// The `### Frozen pages` entry that matches a store-relative `target`, if
709    /// any. The **single** frozen-page matcher every write surface must funnel
710    /// through so the policy is enforced identically on `write` / `fm set` /
711    /// `fm init` / `link` / `rename` / `format`.
712    ///
713    /// Comparison is normalized so a policy line and a write target match
714    /// regardless of incidental spelling differences:
715    /// - `/` path separators on every OS,
716    /// - a single leading `./` dropped,
717    /// - a trailing `.md` dropped on **both** sides — `parse_db_md` stores
718    ///   frozen entries verbatim, so an operator who writes the natural
719    ///   extensionless spelling (`records/decisions/q1`) must protect the file
720    ///   (`records/decisions/q1.md`) exactly as the `.md` spelling does.
721    ///
722    /// Returns the matched config entry verbatim (its original spelling) so the
723    /// caller can name it in the `POLICY_FROZEN_PAGE` refusal.
724    pub fn frozen_match(&self, target: &Path) -> Option<PathBuf> {
725        let want = normalize_frozen_path(target);
726        self.frozen_pages
727            .iter()
728            .find(|frozen| {
729                let pat = normalize_frozen_path(frozen);
730                // A literal entry matches by exact normalized equality; an entry
731                // carrying a `*`/`**` glob matches by segment-wise glob so a
732                // pattern like `records/decisions/*` actually protects the
733                // concrete files under it instead of silently failing open.
734                if pat.contains('*') {
735                    frozen_glob_matches(&pat, &want)
736                } else {
737                    pat == want
738                }
739            })
740            .cloned()
741    }
742
743    /// True if `target` (store-relative) is a frozen page. Convenience wrapper
744    /// over [`Config::frozen_match`] for callers that only need presence.
745    pub fn is_frozen(&self, target: &Path) -> bool {
746        self.frozen_match(target).is_some()
747    }
748}
749
750/// Normalize a path for frozen-page comparison: `/` separators, a leading `./`
751/// or `/` dropped, and a trailing `.md` dropped. Both the policy entry and the
752/// write target pass through this before equality/glob, so the match is
753/// separator-, `./`-, leading-`/`-, and `.md`-insensitive. Without the leading
754/// `/` drop, an operator who wrote `/records/decisions/q1.md` normalized to a
755/// path that never equals the target's `records/decisions/q1`, silently failing
756/// the freeze OPEN.
757fn normalize_frozen_path(p: &Path) -> String {
758    use std::path::Component;
759    // Keep only the `Normal` path segments, dropping `RootDir`/`Prefix` (a
760    // leading `/` or drive prefix) and `CurDir` (`.`). This is what makes a
761    // leading-slash entry (`/records/decisions/q1.md`) normalize to the same
762    // `records/decisions/q1` as the store-relative target, instead of the
763    // doubled-`//` prefix `Path::components` + naive join produced — which never
764    // equalled the target and silently failed the freeze OPEN.
765    let unix: String = p
766        .components()
767        .filter_map(|c| match c {
768            Component::Normal(s) => s.to_str(),
769            _ => None,
770        })
771        .collect::<Vec<_>>()
772        .join("/");
773    unix.strip_suffix(".md").unwrap_or(&unix).to_string()
774}
775
776/// Match a normalized frozen-page glob `pat` against a normalized target `path`,
777/// segment by segment. `*` matches any run of characters *within a single path
778/// segment* (never crossing `/`); `**` as a whole segment matches zero or more
779/// whole segments. Both sides are already `normalize_frozen_path`-normalized, so
780/// this only deals with `/`-joined segment text. Keeps the substrate dependency-
781/// free (no glob crate) while making `records/decisions/*` actually freeze the
782/// files beneath it instead of failing open.
783fn frozen_glob_matches(pat: &str, path: &str) -> bool {
784    let pat_segs: Vec<&str> = pat.split('/').collect();
785    let path_segs: Vec<&str> = path.split('/').collect();
786    glob_segments(&pat_segs, &path_segs)
787}
788
789/// Recursive segment matcher for [`frozen_glob_matches`]. `**` consumes any
790/// number of path segments; every other pattern segment must match exactly one
791/// path segment (with `*` wildcards inside it).
792fn glob_segments(pat: &[&str], path: &[&str]) -> bool {
793    match pat.split_first() {
794        None => path.is_empty(),
795        Some((&"**", rest_pat)) => {
796            // `**` matches zero segments here, or one-or-more by consuming a path
797            // segment and recursing on the same `**`.
798            if glob_segments(rest_pat, path) {
799                return true;
800            }
801            !path.is_empty() && glob_segments(pat, &path[1..])
802        }
803        Some((&first_pat, rest_pat)) => match path.split_first() {
804            Some((&first_path, rest_path)) => {
805                glob_segment_text(first_pat, first_path) && glob_segments(rest_pat, rest_path)
806            }
807            None => false,
808        },
809    }
810}
811
812/// Match a single glob segment against a single path segment. `*` matches any
813/// run of characters within the segment; all other characters are literal.
814fn glob_segment_text(pat: &str, seg: &str) -> bool {
815    if !pat.contains('*') {
816        return pat == seg;
817    }
818    // Split on `*` into literal fragments. The first fragment must be a prefix,
819    // the last a suffix, and the middle fragments must appear in order.
820    let parts: Vec<&str> = pat.split('*').collect();
821    let mut pos = 0usize;
822    for (idx, part) in parts.iter().enumerate() {
823        if part.is_empty() {
824            continue;
825        }
826        if idx == 0 {
827            // Leading literal must be a prefix.
828            if !seg[pos..].starts_with(part) {
829                return false;
830            }
831            pos += part.len();
832        } else if idx == parts.len() - 1 {
833            // Trailing literal must be a suffix at or after the current cursor.
834            return seg[pos..].ends_with(part);
835        } else {
836            // Interior literal: find it at or after the cursor.
837            match seg[pos..].find(part) {
838                Some(off) => pos += off + part.len(),
839                None => return false,
840            }
841        }
842    }
843    true
844}
845
846/// A user-declared type schema parsed from a `DB.md` `### <type>` sub-section.
847/// The store's `## Schemas` is the **only** source of schema enforcement — the
848/// toolkit ships no built-in or implicit per-type schema (see SPEC § Schemas).
849#[derive(Debug, Clone, Default, PartialEq)]
850pub struct Schema {
851    /// One [`FieldSpec`] per bulleted field line, in source order.
852    pub fields: Vec<FieldSpec>,
853    /// `- unique: <field>[, <field> …]` directives — each inner vec is one
854    /// uniqueness constraint over the listed field(s) (compound when >1). Two
855    /// records of this type whose listed values collide warn as
856    /// `DUP_UNIQUE_KEY`.
857    pub unique_keys: Vec<Vec<String>>,
858    /// `- summary_template: <template>` directive — the `{field}` interpolation
859    /// pattern `dbmd fm init` / `dbmd write` use to compose a default `summary`
860    /// for this type. `None` falls back to the body's first paragraph.
861    pub summary_template: Option<String>,
862    /// `- shard: by-date | flat` directive — whether records of this type are
863    /// date-sharded on disk (`records/<type>/<YYYY>/<MM>/…`) or kept flat.
864    /// `None` = no directive declared, so the store's built-in default for the
865    /// type applies ([`crate::store::Store::type_shards`]); `Some(true)` forces
866    /// date-sharding (e.g. a custom event type the toolkit has no built-in for);
867    /// `Some(false)` forces flat. This is the v0.2 generic-model way to declare
868    /// sharding — the toolkit ships no implicit per-type behavior beyond the
869    /// example-type defaults.
870    pub shard: Option<bool>,
871}
872
873/// One field declaration inside a [`Schema`]: `- <name> (<modifiers>)`.
874///
875/// Modifiers are comma-separated inside the parens; this captures the
876/// recognized ones as typed fields and stashes anything unrecognized in
877/// [`unknown_modifiers`](FieldSpec::unknown_modifiers) (surfaced as `Info`).
878#[derive(Debug, Clone, Default, PartialEq)]
879pub struct FieldSpec {
880    /// The field name.
881    pub name: String,
882    /// `required` modifier present.
883    pub required: bool,
884    /// The shape modifier (`string`/`int`/`bool`/`date`/`email`/`currency`/
885    /// `url`), if any.
886    pub shape: Option<Shape>,
887    /// `link to <prefix>/` — the store-relative prefix a wiki-link target must
888    /// start with. The trailing slash is required in the source syntax.
889    pub link_prefix: Option<PathBuf>,
890    /// `default <value>` — the value written when the field is absent.
891    pub default: Option<Value>,
892    /// `enum: <v1>, <v2>, ...` — the allowed values (must be the last modifier
893    /// on the line because of its own commas).
894    pub enum_values: Option<Vec<String>>,
895    /// Any modifiers not in the recognized vocabulary, preserved verbatim;
896    /// validate surfaces these as `Info`, never errors.
897    pub unknown_modifiers: Vec<String>,
898}
899
900/// A recognized shape modifier for a schema field. Validate enforces the
901/// corresponding value shape (`SCHEMA_SHAPE_MISMATCH` on violation).
902#[derive(Debug, Clone, Copy, PartialEq, Eq)]
903pub enum Shape {
904    /// Any scalar string.
905    String,
906    /// Integer.
907    Int,
908    /// Boolean.
909    Bool,
910    /// RFC3339 / ISO-8601 date.
911    Date,
912    /// `<local>@<domain>` email address.
913    Email,
914    /// A currency amount.
915    Currency,
916    /// A URL.
917    Url,
918}
919
920/// The result of splitting a raw file into its frontmatter block and body.
921///
922/// `body` is the verbatim remainder after the closing `---` fence — the writer
923/// preserves it byte-for-byte so operator edits are never reflowed.
924#[derive(Debug, Clone, PartialEq, Eq)]
925pub struct ParsedFile {
926    /// The raw frontmatter YAML (between the fences, exclusive of them).
927    pub frontmatter_yaml: String,
928    /// The verbatim body (everything after the closing `---`).
929    pub body: String,
930}
931
932/// Split a file's full text into its frontmatter block and body. The
933/// frontmatter block must be the very first thing in the file, delimited by
934/// `---` on its own line at start and end. Returns
935/// [`ParseError::MissingFrontmatter`] if absent.
936pub fn split_frontmatter(text: &str, file: &Path) -> Result<ParsedFile, ParseError> {
937    // Tolerate a single leading UTF-8 BOM (U+FEFF) before the opening fence,
938    // matching `store::frontmatter_block` and `index::extract_frontmatter_block`
939    // which already strip it. Without this, a BOM-prefixed file (common from
940    // Windows / exported markdown dropped into `sources/`) gets walked and
941    // indexed by `dbmd index` yet hard-fails every write/edit surface that
942    // routes through `read_file` (`fm get/set`, `format`, `link`, `write`). The
943    // BOM is dropped from the emitted body so the canonical writer never carries
944    // it forward.
945    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
946
947    // The opening fence must be the very first line: `---`, no leading
948    // whitespace, nothing before it. Trailing whitespace on the fence line is
949    // tolerated via `trim_end()` (which strips spaces/tabs as well as CR/LF) so
950    // this matches `index::extract_frontmatter_block` and
951    // `validate::split_frontmatter`, both of which use `trim_end()`. Without this
952    // agreement a fence written `--- ` (a single trailing space — invisible in an
953    // editor, easily produced by hand edits or exporters) was indexed and
954    // validated clean by those scanners yet hard-failed every write/edit surface
955    // routed through `read_file` (`fm get/set`, `format`, `link`, `write`) — the
956    // same cross-scanner drift class already fixed for the UTF-8 BOM above.
957    let mut lines = text.split_inclusive('\n');
958    let first = lines.next().unwrap_or("");
959    if first.trim_end() != "---" {
960        return Err(ParseError::MissingFrontmatter {
961            file: file.to_path_buf(),
962        });
963    }
964
965    // Scan for the closing fence line. Track byte offsets so we can slice the
966    // YAML (between fences, exclusive) and the body (verbatim, after the
967    // closing fence's line terminator).
968    let opening_len = first.len();
969    let mut offset = opening_len;
970    for line in lines {
971        if line.trim_end() == "---" {
972            let yaml = &text[opening_len..offset];
973            let body_start = offset + line.len();
974            let body = &text[body_start..];
975            return Ok(ParsedFile {
976                frontmatter_yaml: yaml.to_string(),
977                body: body.to_string(),
978            });
979        }
980        offset += line.len();
981    }
982
983    // Opening fence present but no closing fence: malformed frontmatter block.
984    Err(ParseError::MissingFrontmatter {
985        file: file.to_path_buf(),
986    })
987}
988
989/// Read a file from disk and parse it into typed [`Frontmatter`] plus the
990/// verbatim body string.
991pub fn read_file(path: &Path) -> Result<(Frontmatter, String), ParseError> {
992    let text = std::fs::read_to_string(path)?;
993    let parsed = split_frontmatter(&text, path)?;
994    let fm = Frontmatter::parse(&parsed.frontmatter_yaml, path)?;
995    Ok((fm, parsed.body))
996}
997
998/// Atomically write a markdown file from frontmatter + body: emit the
999/// frontmatter in canonical key order, then the body verbatim, via a
1000/// temp-file-rename so a reader never sees a half-written file. Preserves the
1001/// operator-edited body exactly as given.
1002pub fn write_file(path: &Path, frontmatter: &Frontmatter, body: &str) -> Result<(), ParseError> {
1003    let contents = render_file(frontmatter, body);
1004
1005    // One durable, atomic write for all primary data (see `crate::fsx`):
1006    // temp-file + fsync + rename + parent-fsync. Content records are primary
1007    // data, so they get the durable path (unlike the rebuildable index).
1008    crate::fsx::write_atomic(path, contents.as_bytes())?;
1009    Ok(())
1010}
1011
1012/// Atomically create a markdown file from frontmatter + body, refusing with
1013/// [`std::io::ErrorKind::AlreadyExists`] if the destination already exists.
1014///
1015/// This is the create-new sibling of [`write_file`]: same canonical rendering
1016/// and durable temp-file path, but backed by [`crate::fsx::write_atomic_new`] so
1017/// two concurrent creators for the same path cannot both succeed.
1018pub fn write_file_new(
1019    path: &Path,
1020    frontmatter: &Frontmatter,
1021    body: &str,
1022) -> Result<(), ParseError> {
1023    let contents = render_file(frontmatter, body);
1024    crate::fsx::write_atomic_new(path, contents.as_bytes())?;
1025    Ok(())
1026}
1027
1028fn render_file(frontmatter: &Frontmatter, body: &str) -> String {
1029    let yaml = frontmatter.to_yaml();
1030    // `to_yaml` already terminates each block with a newline. Compose the file
1031    // as: opening fence, frontmatter YAML, closing fence, then body verbatim.
1032    let mut contents = String::with_capacity(yaml.len() + body.len() + 8);
1033    contents.push_str("---\n");
1034    contents.push_str(&yaml);
1035    contents.push_str("---\n");
1036    contents.push_str(body);
1037    contents
1038}
1039
1040/// Extract every wiki-link from a body (and inline frontmatter), returning the
1041/// structured [`WikiLink`] stream with short-form / `.md`-extension flags and
1042/// `(file, line, col)` locations set.
1043pub fn extract_wiki_links(body: &str, file: &Path) -> Vec<WikiLink> {
1044    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
1045    let re = RE.get_or_init(|| {
1046        // [[target]] or [[target|display]]; target/display exclude brackets and
1047        // (for target) the `|` separator so nested forms don't over-match.
1048        regex::Regex::new(r"\[\[([^\[\]|]+?)(?:\|([^\[\]]*))?\]\]").expect("valid wiki-link regex")
1049    });
1050
1051    let mut out = Vec::new();
1052    for (line_idx, line) in body.lines().enumerate() {
1053        // Running (byte, char) cursor: derive each match's column in ONE linear
1054        // pass over the line instead of recomputing it from the line start per
1055        // match. `captures_iter` yields non-overlapping matches in increasing
1056        // byte order, so advancing the char count by the gap since the previous
1057        // match keeps the whole line O(line_len) rather than O(matches × len).
1058        let mut cursor = ColCursor::new();
1059        for caps in re.captures_iter(line) {
1060            let whole = caps.get(0).expect("group 0 always present");
1061            let col = cursor.column_at(line, whole.start());
1062            let target = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
1063            let display = caps.get(2).map(|m| m.as_str().to_string());
1064            out.push(WikiLink {
1065                is_full_path: target_is_full_path(&target),
1066                has_md_extension: target_has_md_extension(&target),
1067                target,
1068                display,
1069                location: (file.to_path_buf(), (line_idx as u32) + 1, col),
1070            });
1071        }
1072    }
1073    out
1074}
1075
1076/// Extract every standard markdown link `[text](url)` from a body into a
1077/// separate stream, kept distinct from wiki-links.
1078pub fn extract_markdown_links(body: &str, file: &Path) -> Vec<MarkdownLink> {
1079    static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
1080    let re = RE.get_or_init(|| {
1081        // [text](url). `text` excludes brackets so a wiki-link `[[x]]` (which
1082        // has `]]`, not `](`) never matches; `url` excludes `)` and whitespace.
1083        regex::Regex::new(r"\[([^\[\]]*)\]\(([^)\s]*)\)").expect("valid markdown-link regex")
1084    });
1085
1086    let mut out = Vec::new();
1087    for (line_idx, line) in body.lines().enumerate() {
1088        // One linear column cursor per line (see `extract_wiki_links`): avoids the
1089        // O(matches × line_len) recompute on a link-dense line.
1090        let mut cursor = ColCursor::new();
1091        for caps in re.captures_iter(line) {
1092            let whole = caps.get(0).expect("group 0 always present");
1093            let col = cursor.column_at(line, whole.start());
1094            out.push(MarkdownLink {
1095                text: caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(),
1096                url: caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(),
1097                location: (file.to_path_buf(), (line_idx as u32) + 1, col),
1098            });
1099        }
1100    }
1101    out
1102}
1103
1104/// Detect the frontmatter wiki-link-list mis-encoding: a wiki-link *list*
1105/// written so YAML parses it as nested sequences instead of a clean list of
1106/// strings. Returns the offending keys so validate can emit
1107/// `WIKI_LINK_FLOW_FORM_LIST`.
1108///
1109/// The subtlety is that `[[x]]` is YAML for "a list containing `[x]`", so the
1110/// shapes nest:
1111///
1112/// - **Scalar inline** `company: [[records/x]]` → `Seq[ Seq[String] ]`
1113///   (double-nested). This is the spec's scalar wiki-link form — NOT flagged.
1114/// - **Flow list** `attendees: [[[a]], [[b]]]` → `Seq[ Seq[Seq[String]], … ]`
1115///   (triple-nested). The list mis-encoding — flagged.
1116/// - **Unquoted block list** (`- [[a]]` per line) → also triple-nested, so it
1117///   is flagged too; the canonical list form must quote each item
1118///   (`- "[[a]]"`), which parses to a clean `Seq[String, …]` and is NOT flagged.
1119///
1120/// So the discriminator is nesting depth: a *list* mis-encoding has at least one
1121/// item that is itself a sequence-of-sequences, whereas a scalar inline link's
1122/// single item is a sequence-of-scalars.
1123pub fn detect_flow_form_link_lists(frontmatter_yaml: &str) -> Vec<String> {
1124    let value: Value = match serde_norway::from_str(frontmatter_yaml) {
1125        Ok(v) => v,
1126        // Malformed YAML is FM_MALFORMED_YAML's job, not ours; report nothing.
1127        Err(_) => return Vec::new(),
1128    };
1129    let Value::Mapping(map) = value else {
1130        return Vec::new();
1131    };
1132
1133    let mut out = Vec::new();
1134    for (k, v) in &map {
1135        if let Value::Sequence(items) = v {
1136            // Triple-nesting: some outer item is a sequence that itself holds a
1137            // sequence. Scalar inline `[[x]]` is only double-nested, so it
1138            // never matches.
1139            let is_link_list = items.iter().any(|item| match item {
1140                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
1141                _ => false,
1142            });
1143            if is_link_list {
1144                if let Some(key) = k.as_str() {
1145                    out.push(key.to_string());
1146                }
1147            }
1148        }
1149    }
1150    out
1151}
1152
1153/// Extract the `##`/`###` sections of a markdown body into a flat list with
1154/// body slices.
1155pub fn extract_sections(body: &str) -> Vec<Section> {
1156    // Keep each line's start so we can slice the body verbatim (exact newlines).
1157    let lines: Vec<&str> = body.split_inclusive('\n').collect();
1158
1159    // First pass: classify heading levels (0 = not a heading), honoring fenced
1160    // code blocks so a `## x` inside a ``` fence is not treated as a heading.
1161    let mut levels: Vec<u8> = Vec::with_capacity(lines.len());
1162    let mut fence: Option<(u8, usize)> = None;
1163    for line in &lines {
1164        let content = line.trim_end_matches(['\n', '\r']);
1165        if let Some(f) = fence {
1166            if is_closing_fence(content, f) {
1167                fence = None;
1168            }
1169            levels.push(0);
1170            continue;
1171        }
1172        if let Some(opened) = opening_fence(content) {
1173            fence = Some(opened);
1174            levels.push(0);
1175            continue;
1176        }
1177        levels.push(heading_level(content));
1178    }
1179
1180    // Second pass: emit `##`+ headings; each section body runs from its heading
1181    // line to the next heading at an equal-or-shallower level (exclusive).
1182    let mut sections = Vec::new();
1183    for (i, &lvl) in levels.iter().enumerate() {
1184        if lvl < 2 {
1185            continue;
1186        }
1187        let heading_line = lines[i].trim_end_matches(['\n', '\r']);
1188        let heading = heading_text(heading_line, lvl);
1189
1190        let mut end = lines.len();
1191        for (j, &other) in levels.iter().enumerate().skip(i + 1) {
1192            if other != 0 && other <= lvl {
1193                end = j;
1194                break;
1195            }
1196        }
1197
1198        sections.push(Section {
1199            heading,
1200            level: lvl,
1201            line: (i + 1) as u32,
1202            body: lines[i..end].concat(),
1203        });
1204    }
1205    sections
1206}
1207
1208/// Extract the `##`/`###` sections of a **whole file** (frontmatter + body),
1209/// returning each [`Section`] with `line` numbered against the *source file*,
1210/// not the body.
1211///
1212/// [`extract_sections`] numbers headings 1-based within the body it is handed —
1213/// the right frame for callers that already track the frontmatter offset
1214/// (`validate` adds `fm_end_line`). But the single-file views (`dbmd sections`,
1215/// `dbmd outline`) present `Section::line` as a source line an agent can jump to;
1216/// because every db.md file opens with a frontmatter block, the body-relative
1217/// number is off by the block's length (`opening fence + frontmatter lines +
1218/// closing fence`) for every file. This helper does the offset once, in the
1219/// parser, so those surfaces report true file lines. A file with no leading
1220/// frontmatter block is treated as all-body (offset 0), so the function never
1221/// fails just because a file lacks frontmatter.
1222pub fn extract_sections_in_file(text: &str) -> Vec<Section> {
1223    // Tolerate a leading BOM the same way `split_frontmatter` does, so the line
1224    // count and the body slice agree with the read path.
1225    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
1226
1227    // Find the body and how many source lines precede it. The body begins right
1228    // after the closing fence; the number of lines consumed by the frontmatter
1229    // block (both fences + the YAML between) is the offset to add to each
1230    // body-relative heading line.
1231    let (body, offset) = match split_frontmatter(text, Path::new("<sections>")) {
1232        Ok(parsed) => {
1233            // Lines before the body = total lines in `text` minus lines in body.
1234            let total_lines = count_lines(text);
1235            let body_lines = count_lines(&parsed.body);
1236            (parsed.body, total_lines.saturating_sub(body_lines))
1237        }
1238        // No frontmatter block: the whole text is body, no offset.
1239        Err(_) => (text.to_string(), 0),
1240    };
1241
1242    let mut sections = extract_sections(&body);
1243    for s in &mut sections {
1244        s.line += offset;
1245    }
1246    sections
1247}
1248
1249/// Count the number of lines a string spans for line-number offsetting: one line
1250/// per `\n`, plus one more for a final line with no trailing newline. An empty
1251/// string is zero lines.
1252fn count_lines(s: &str) -> u32 {
1253    if s.is_empty() {
1254        return 0;
1255    }
1256    let newlines = s.bytes().filter(|&b| b == b'\n').count() as u32;
1257    if s.ends_with('\n') {
1258        newlines
1259    } else {
1260        newlines + 1
1261    }
1262}
1263
1264/// Parse a store's `DB.md` file into a [`Config`]: the `## Agent instructions`
1265/// prose, `## Policies` (`### Frozen pages` + `### Ignored types`), and
1266/// `## Schemas` (`### <type>` field-bullet blocks). Unrecognized sections are
1267/// ignored; absent sections leave their [`Config`] fields at default.
1268pub fn parse_db_md(text: &str, file: &Path) -> Result<Config, ParseError> {
1269    // The structured sections live in the body (after frontmatter). DB.md must
1270    // still start with a valid `---` block (`type: db-md`); if it's missing we
1271    // surface MissingFrontmatter like any other file.
1272    let parsed = split_frontmatter(text, file)?;
1273    let _frontmatter = Frontmatter::parse(&parsed.frontmatter_yaml, file)?;
1274    let sections = extract_sections(&parsed.body);
1275
1276    let mut config = Config::default();
1277    // Track which H2 region each H3 belongs to as we walk the flat list.
1278    let mut current_h2: Option<String> = None;
1279
1280    for section in &sections {
1281        match section.level {
1282            2 => {
1283                let name = section.heading.trim().to_ascii_lowercase();
1284                current_h2 = Some(name.clone());
1285                if name == "agent instructions" {
1286                    let prose = section_prose(&section.body);
1287                    if !prose.is_empty() {
1288                        config.agent_instructions = Some(prose);
1289                    }
1290                } else if name == "folders" {
1291                    // `## Folders` carries its bullets directly under the H2 (no
1292                    // `### <type>` sub-sections), like `## Agent instructions`.
1293                    for b in bullet_lines(&section.body) {
1294                        if let Some((path, meta)) = parse_folder_bullet(&b) {
1295                            config.folders.insert(path, meta);
1296                        }
1297                    }
1298                }
1299            }
1300            3 => {
1301                let h2 = current_h2.as_deref().unwrap_or("");
1302                let h3 = section.heading.trim().to_ascii_lowercase();
1303                match (h2, h3.as_str()) {
1304                    ("policies", "frozen pages") => {
1305                        config.frozen_pages = bullet_lines(&section.body)
1306                            .into_iter()
1307                            .map(|b| PathBuf::from(extract_path_bullet(&b)))
1308                            .collect();
1309                    }
1310                    ("policies", "ignored types") => {
1311                        config.ignored_types = bullet_lines(&section.body)
1312                            .into_iter()
1313                            .flat_map(|b| extract_type_list_bullet(&b))
1314                            .collect();
1315                    }
1316                    ("schemas", _) => {
1317                        // The H3 heading text (as written) is the type name.
1318                        let type_name = section.heading.trim().to_string();
1319                        let mut schema = Schema::default();
1320                        for b in bullet_lines(&section.body) {
1321                            match parse_schema_bullet(&b) {
1322                                SchemaBullet::Field(f) => schema.fields.push(f),
1323                                SchemaBullet::Unique(k) if !k.is_empty() => {
1324                                    schema.unique_keys.push(k)
1325                                }
1326                                SchemaBullet::SummaryTemplate(t) if !t.is_empty() => {
1327                                    schema.summary_template = Some(t)
1328                                }
1329                                SchemaBullet::Shard(Some(b)) => schema.shard = Some(b),
1330                                // Empty `unique:`/`summary_template:`, or a `shard:`
1331                                // with an unrecognized value — ignored.
1332                                SchemaBullet::Unique(_)
1333                                | SchemaBullet::SummaryTemplate(_)
1334                                | SchemaBullet::Shard(None) => {}
1335                            }
1336                        }
1337                        config.schemas.insert(type_name, schema);
1338                    }
1339                    _ => {}
1340                }
1341            }
1342            _ => {}
1343        }
1344    }
1345
1346    Ok(config)
1347}
1348
1349/// One parsed bullet inside a `### <type>` schema block: an ordinary field, or a
1350/// reserved directive (`unique:` / `summary_template:` / `shard:`). The names
1351/// `unique`, `summary_template`, and `shard` are reserved and cannot be used as
1352/// field names.
1353#[derive(Debug)]
1354enum SchemaBullet {
1355    /// An ordinary `- <name> (<modifiers>)` field.
1356    Field(FieldSpec),
1357    /// `- unique: <field>[, <field> …]` — a (possibly compound) uniqueness key.
1358    Unique(Vec<String>),
1359    /// `- summary_template: <template>` — the default-`summary` pattern.
1360    SummaryTemplate(String),
1361    /// `- shard: by-date | flat` — date-shard records of this type, or keep them
1362    /// flat. `None` = an unrecognized value, ignored like an unknown modifier.
1363    Shard(Option<bool>),
1364}
1365
1366/// Classify one `## Schemas` bullet as a directive or a field. The directive
1367/// forms are `- unique: a, b, …` and `- summary_template: …`; the keyword check
1368/// guards against false positives — a field like `- status (enum: a, b)` has a
1369/// `(` before any `:`, so its head isn't a bare reserved keyword and it parses
1370/// as a [`FieldSpec`].
1371fn parse_schema_bullet(bullet_line: &str) -> SchemaBullet {
1372    let line = bullet_line.trim();
1373    let line = line
1374        .strip_prefix("- ")
1375        .or_else(|| line.strip_prefix("* "))
1376        .or_else(|| line.strip_prefix("+ "))
1377        .or_else(|| line.strip_prefix('-'))
1378        .unwrap_or(line)
1379        .trim();
1380
1381    if let Some((head, rest)) = line.split_once(':') {
1382        match head.trim().to_ascii_lowercase().as_str() {
1383            "unique" => {
1384                let fields = rest
1385                    .split(',')
1386                    .map(|f| f.trim().to_string())
1387                    .filter(|f| !f.is_empty())
1388                    .collect();
1389                return SchemaBullet::Unique(fields);
1390            }
1391            "summary_template" => {
1392                return SchemaBullet::SummaryTemplate(rest.trim().to_string());
1393            }
1394            "shard" => {
1395                // `by-date` (synonyms: date/sharded/true) enables date-sharding;
1396                // `flat` (none/false) forces flat; anything else is ignored.
1397                let v = match rest.trim().to_ascii_lowercase().as_str() {
1398                    "by-date" | "date" | "sharded" | "true" => Some(true),
1399                    "flat" | "none" | "false" => Some(false),
1400                    _ => None,
1401                };
1402                return SchemaBullet::Shard(v);
1403            }
1404            _ => {}
1405        }
1406    }
1407
1408    SchemaBullet::Field(parse_field_spec(bullet_line))
1409}
1410
1411/// Parse one `## Folders` bullet — `- <path>[|<display>] — <description>` — into
1412/// the folder path (store-relative, unix-slash, no trailing slash) and its
1413/// [`FolderMeta`]. The optional `|<display>` overrides the rollup's derived
1414/// folder name (mirroring the wiki-link `|display` convention); the text after
1415/// the first em-dash (`—`), or ` - `, is the description. Backticks around the
1416/// path are tolerated (matching the `### Frozen pages` spelling). Returns `None`
1417/// for a bullet with no usable path.
1418fn parse_folder_bullet(bullet_line: &str) -> Option<(String, FolderMeta)> {
1419    let line = bullet_line.trim();
1420    let line = line
1421        .strip_prefix("- ")
1422        .or_else(|| line.strip_prefix("* "))
1423        .or_else(|| line.strip_prefix("+ "))
1424        .or_else(|| line.strip_prefix('-'))
1425        .unwrap_or(line)
1426        .trim();
1427
1428    // Split off the description at the first em-dash (preferred, matching the
1429    // rollup's own ` — ` separator) or a ` - ` fallback.
1430    let (pathspec, description) = match line.find('—') {
1431        Some(i) => (line[..i].trim(), Some(line[i + '—'.len_utf8()..].trim())),
1432        None => match line.find(" - ") {
1433            Some(i) => (line[..i].trim(), Some(line[i + 3..].trim())),
1434            None => (line, None),
1435        },
1436    };
1437
1438    // Optional `|display` override lives on the path side.
1439    let (path_raw, display) = match pathspec.split_once('|') {
1440        Some((p, d)) => (p.trim(), Some(d.trim())),
1441        None => (pathspec, None),
1442    };
1443
1444    // Normalize the path: drop surrounding backticks, a leading `./`, a trailing `/`.
1445    let path = path_raw.trim().trim_matches('`').trim();
1446    let path = path.strip_prefix("./").unwrap_or(path);
1447    let path = path.strip_suffix('/').unwrap_or(path).trim();
1448    if path.is_empty() {
1449        return None;
1450    }
1451
1452    let non_empty = |s: &str| {
1453        let t = s.trim();
1454        (!t.is_empty()).then(|| t.to_string())
1455    };
1456    Some((
1457        path.to_string(),
1458        FolderMeta {
1459            display: display.and_then(non_empty),
1460            description: description.and_then(non_empty),
1461        },
1462    ))
1463}
1464
1465/// Parse a single `## Schemas` field-bullet line — `- <name> (<modifiers>)` —
1466/// into a [`FieldSpec`], capturing recognized modifiers and stashing the rest
1467/// in [`FieldSpec::unknown_modifiers`].
1468pub fn parse_field_spec(bullet_line: &str) -> FieldSpec {
1469    // Strip the leading bullet marker (`- ` / `* ` / `+ `) and surrounding ws.
1470    let line = bullet_line.trim();
1471    let line = line
1472        .strip_prefix("- ")
1473        .or_else(|| line.strip_prefix("* "))
1474        .or_else(|| line.strip_prefix("+ "))
1475        .or_else(|| line.strip_prefix('-'))
1476        .unwrap_or(line)
1477        .trim();
1478
1479    // Split `<name> (<modifiers>)` — the canonical paren form — OR the natural
1480    // mis-spelling `<name>: <modifiers>` (colon instead of parens). The two
1481    // delimiters are interchangeable for the field head; whichever appears FIRST
1482    // wins, so a paren form whose modifiers contain a colon (`status (enum: a,
1483    // b)`) still parses by parens (the `(` precedes the `:`), while a bare
1484    // `title: string, required` parses by colon instead of being swallowed whole
1485    // into the field name with every modifier silently dropped.
1486    let paren = line.find('(');
1487    let colon = line.find(':');
1488    // Choose the head delimiter. The paren form wins when its `(` precedes any
1489    // `:` (so `status (enum: a, b)` parses by parens, the colon being inside the
1490    // modifiers); otherwise a `:` before the paren — or with no paren at all —
1491    // selects the colon form `<name>: <modifiers>`, the natural mis-spelling that
1492    // must NOT be swallowed whole into the field name with every modifier lost.
1493    let use_paren = matches!((paren, colon), (Some(p), c) if c.is_none_or(|c| p < c));
1494    let (name, modifiers) = if use_paren {
1495        let open = paren.expect("use_paren implies a paren");
1496        let name = line[..open].trim().to_string();
1497        let after = &line[open + 1..];
1498        let mods = match after.rfind(')') {
1499            Some(close) => &after[..close],
1500            None => after, // tolerate a missing close paren
1501        };
1502        (name, mods.trim())
1503    } else if let Some(c) = colon {
1504        // Colon form: everything after the first colon is the modifier list,
1505        // parsed identically to the parenthesized modifiers below.
1506        let name = line[..c].trim().to_string();
1507        (name, line[c + 1..].trim())
1508    } else {
1509        // Neither delimiter: a free-form optional field of any shape — name only.
1510        (line.to_string(), "")
1511    };
1512
1513    let mut spec = FieldSpec {
1514        name,
1515        ..FieldSpec::default()
1516    };
1517
1518    if modifiers.is_empty() {
1519        return spec;
1520    }
1521
1522    // Modifiers are comma-separated. `enum` and `default` are special: their own
1523    // values may contain commas, so each is a *greedy* clause that runs from its
1524    // keyword to the start of the next recognized greedy clause (or end of line).
1525    // This lets `default North America, EMEA fallback` keep its comma and lets a
1526    // `default …` written after an `enum …` still be recognized, instead of the
1527    // value being truncated at the first comma or absorbed into the enum list.
1528    let raw: Vec<&str> = modifiers.split(',').collect();
1529    let mut i = 0;
1530    while i < raw.len() {
1531        let token = raw[i].trim();
1532        if token.is_empty() {
1533            i += 1;
1534            continue;
1535        }
1536        let lower = token.to_ascii_lowercase();
1537
1538        if lower == "required" {
1539            spec.required = true;
1540            i += 1;
1541        } else if let Some(shape) = shape_from_str(&lower) {
1542            spec.shape = Some(shape);
1543            i += 1;
1544        } else if let Some(rest) = lower.strip_prefix("link to ") {
1545            // The trailing slash is required in the source; store the prefix
1546            // without it so `Path::starts_with` comparisons are clean.
1547            let prefix = token["link to ".len()..].trim().trim_end_matches('/');
1548            let _ = rest; // lowercase form only used for the keyword match
1549            spec.link_prefix = Some(PathBuf::from(prefix));
1550            i += 1;
1551        } else if token.len() >= "default ".len() && lower.starts_with("default ") {
1552            // Greedy `default <value>`: the value is this token (after the
1553            // keyword) plus every following comma-token up to the next greedy
1554            // clause, rejoined with the commas the split removed — so a comma
1555            // inside the default value is preserved. Original case is kept.
1556            let end = next_greedy_clause(&raw, i + 1);
1557            let mut value = token["default ".len()..].to_string();
1558            for tok in &raw[i + 1..end] {
1559                value.push(',');
1560                value.push_str(tok);
1561            }
1562            spec.default = Some(Value::String(value.trim().to_string()));
1563            i = end;
1564        } else if lower == "enum" || lower.starts_with("enum:") {
1565            // Greedy `enum` (bare `enum, a, b` or `enum: a, b`): the values run
1566            // from here to the next greedy clause (e.g. a trailing `default …`),
1567            // NOT unconditionally to end-of-line — so a `default` after `enum` is
1568            // parsed instead of swallowed as a bogus enum member.
1569            let end = next_greedy_clause(&raw, i + 1);
1570            // Rejoin this clause's tokens (trimmed so the `enum` head sits at the
1571            // start), drop the leading `enum`/`enum:` head, then re-split the
1572            // remainder into values.
1573            let joined = raw[i..end].join(",");
1574            let joined = joined.trim();
1575            let after_kw = match joined.find(':') {
1576                // `enum: a, b` — values follow the colon.
1577                Some(colon) => &joined[colon + 1..],
1578                // bare `enum, a, b` — values follow the keyword itself.
1579                None => joined.get("enum".len()..).unwrap_or(""),
1580            };
1581            let values: Vec<String> = after_kw
1582                .split(',')
1583                .map(|v| v.trim().to_string())
1584                .filter(|v| !v.is_empty())
1585                .collect();
1586            spec.enum_values = Some(values);
1587            i = end;
1588        } else {
1589            // Unrecognized modifier — captured verbatim, surfaced as Info.
1590            spec.unknown_modifiers.push(token.to_string());
1591            i += 1;
1592        }
1593    }
1594
1595    spec
1596}
1597
1598// ── Private helpers ─────────────────────────────────────────────────────────
1599
1600/// Parse a frontmatter timestamp value into a `DateTime<FixedOffset>`. A `null`
1601/// is treated as absent; anything else must be an RFC3339 string.
1602fn parse_timestamp(
1603    value: &Value,
1604    key: &str,
1605    file: &Path,
1606) -> Result<Option<DateTime<FixedOffset>>, ParseError> {
1607    match value {
1608        Value::Null => Ok(None),
1609        Value::String(s) => parse_rfc3339(s, key, file).map(Some),
1610        other => Err(ParseError::BadTimestamp {
1611            file: file.to_path_buf(),
1612            key: key.to_string(),
1613            value: format!("{other:?}"),
1614        }),
1615    }
1616}
1617
1618/// Parse an RFC3339 timestamp string, mapping failure to [`ParseError::BadTimestamp`].
1619fn parse_rfc3339(s: &str, key: &str, file: &Path) -> Result<DateTime<FixedOffset>, ParseError> {
1620    DateTime::parse_from_rfc3339(s.trim()).map_err(|_| ParseError::BadTimestamp {
1621        file: file.to_path_buf(),
1622        key: key.to_string(),
1623        value: s.to_string(),
1624    })
1625}
1626
1627/// Coerce a YAML scalar value to its string form for the universal-contract
1628/// fields (`type`/`id`/`summary`/`status`). Mirrors `validate::scalar_string`
1629/// and `store::yaml_scalar_string` so the four modules agree on one coercion
1630/// rule: a bare numeric/bool scalar (`id: 100`, `summary: 2026`, `status: 0`)
1631/// is preserved as its string form rather than being read as None and silently
1632/// dropped on the next `to_yaml` re-emit. Returns `None` only for genuinely
1633/// non-scalar values (sequences, mappings, null), which were never a valid
1634/// shape for these fields.
1635fn scalar_string(value: &Value) -> Option<String> {
1636    match value {
1637        Value::String(s) => Some(s.clone()),
1638        Value::Number(n) => Some(n.to_string()),
1639        Value::Bool(b) => Some(b.to_string()),
1640        _ => None,
1641    }
1642}
1643
1644/// Read a `tags` value into a flat `Vec<String>`. Accepts a sequence of scalars
1645/// (the canonical form) or a single scalar (coerced to a one-element list).
1646fn parse_tags(value: &Value) -> Vec<String> {
1647    match value {
1648        Value::Sequence(items) => items
1649            .iter()
1650            .filter_map(|v| match v {
1651                Value::String(s) => Some(s.clone()),
1652                Value::Number(n) => Some(n.to_string()),
1653                Value::Bool(b) => Some(b.to_string()),
1654                _ => None,
1655            })
1656            .collect(),
1657        Value::String(s) => vec![s.clone()],
1658        _ => Vec::new(),
1659    }
1660}
1661
1662/// Read a `tags` value into a flat `Vec<String>` **without losing data**: a
1663/// sequence of clean scalars (the canonical form) or a single scalar coerce to a
1664/// string list. Any other shape — a sequence with a non-scalar item
1665/// (`tags: [[vip]]` → `Seq[Seq[String]]`, `tags: [a, [b]]`), or a mapping — is
1666/// rejected as `Err(value.clone())` so the caller preserves the raw value in
1667/// `extra` rather than silently filtering items out / erasing the field on the
1668/// next re-emit. This is the `tags` analog of routing a non-scalar universal
1669/// value to pass-through instead of the destroy path.
1670fn parse_tags_preserving(value: &Value) -> Result<Vec<String>, Value> {
1671    match value {
1672        Value::Sequence(items) => {
1673            let mut out = Vec::with_capacity(items.len());
1674            for item in items {
1675                match item {
1676                    Value::String(s) => out.push(s.clone()),
1677                    Value::Number(n) => out.push(n.to_string()),
1678                    Value::Bool(b) => out.push(b.to_string()),
1679                    // A non-scalar item (nested sequence/mapping/null) means this
1680                    // is not a clean tag list; preserve the whole value verbatim.
1681                    _ => return Err(value.clone()),
1682                }
1683            }
1684            Ok(out)
1685        }
1686        Value::String(s) => Ok(vec![s.clone()]),
1687        Value::Number(n) => Ok(vec![n.to_string()]),
1688        Value::Bool(b) => Ok(vec![b.to_string()]),
1689        // A mapping / null `tags` value is not a list; preserve it verbatim.
1690        _ => Err(value.clone()),
1691    }
1692}
1693
1694/// Render a non-string YAML mapping key as the scalar text YAML would emit for
1695/// it (`2026`, `true`, `3.14`, …), so a numeric/bool/float frontmatter key
1696/// preserves its key *text* on round-trip instead of being rewritten to its Rust
1697/// `Debug` form (`Number(2026)`, `Bool(true)`, `'Null'`). The key re-emits as a
1698/// string-typed key carrying the original text (`'2026':`) — the type narrows to
1699/// string, but the operator's data is no longer corrupted, and ordinary string
1700/// keys are wholly unaffected. Falls back to `Debug` only for a key shape that
1701/// cannot be a scalar (a sequence/mapping key — not expressible in our
1702/// `String`-keyed `extra`), which never occurs in practice.
1703fn yaml_scalar_key(key: &Value) -> String {
1704    match key {
1705        Value::String(s) => s.clone(),
1706        Value::Number(n) => n.to_string(),
1707        Value::Bool(b) => b.to_string(),
1708        Value::Null => "null".to_string(),
1709        // Non-scalar key: not representable as a plain `extra` string key; keep
1710        // the defensive Debug form so nothing panics (unreachable in practice).
1711        other => format!("{other:?}"),
1712    }
1713}
1714
1715/// Parse a single `[[target|display]]` string into a [`WikiLink`] with no
1716/// location, or `None` if the string is not a bare wiki-link. Used for
1717/// frontmatter-valued links where there is no body position to report.
1718fn parse_wiki_link_str(s: &str) -> Option<WikiLink> {
1719    let s = s.trim();
1720    let inner = s.strip_prefix("[[")?.strip_suffix("]]")?;
1721    // Reject anything with further brackets (e.g. the nested flow-form item),
1722    // which is not a clean single wiki-link.
1723    if inner.contains('[') || inner.contains(']') {
1724        return None;
1725    }
1726    let (target, display) = match inner.split_once('|') {
1727        Some((t, d)) => (t.to_string(), Some(d.to_string())),
1728        None => (inner.to_string(), None),
1729    };
1730    Some(WikiLink {
1731        is_full_path: target_is_full_path(&target),
1732        has_md_extension: target_has_md_extension(&target),
1733        target,
1734        display,
1735        location: (PathBuf::new(), 0, 0),
1736    })
1737}
1738
1739/// Extract every wiki-link from a single frontmatter field value, accepting the
1740/// two canonical forms the spec defines (SPEC § Linking):
1741///
1742/// - a **scalar** wiki-link field, in either the quoted (`f: "[[x]]"`) or the
1743///   canonical unquoted inline (`f: [[x]]`) form, and
1744/// - a **list** field whose items are quoted wiki-link strings
1745///   (`- "[[x]]"`).
1746///
1747/// YAML eats the brackets of an unquoted `[[x]]`, leaving a flow-list-in-a-list,
1748/// so the parsed [`Value`] shapes are not what one would naively expect:
1749///
1750/// | source                         | parsed `Value`                     | here |
1751/// |--------------------------------|------------------------------------|------|
1752/// | `f: "[[x]]"`       (quoted)    | `String("[[x]]")`                  | link |
1753/// | `f: [[x]]`         (unquoted)  | `Seq[ Seq[String("x")] ]`          | link |
1754/// | `f:`\n`  - "[[x]]"`(quoted)    | `Seq[ String("[[x]]"), … ]`        | link |
1755/// | `f:`\n`  - [[x]]`  (unquoted)  | `Seq[ Seq[Seq[String("x")]], … ]`  | —    |
1756///
1757/// The last row — an *unquoted list* — parses identically to the flow-form list
1758/// `f: [[a], [b]]` and is a mis-encoding the canonical writer never emits;
1759/// `dbmd validate` reports it as `WIKI_LINK_FLOW_FORM_LIST` (see
1760/// [`detect_flow_form_link_lists`]). It is deliberately NOT surfaced here, so an
1761/// edge enumerator only ever sees the valid canonical forms.
1762///
1763/// The unquoted scalar (`Seq[Seq[String]]`, one element) is told apart from a
1764/// plain one-item flow list (`f: [x]` → `Seq[String]`, one fewer nesting level)
1765/// by [`unquoted_inline_link`] requiring its argument to be a `Sequence`.
1766fn links_in_field_value(value: &Value) -> Vec<WikiLink> {
1767    // Quoted scalar: `field: "[[x]]"`.
1768    if let Value::String(s) = value {
1769        return parse_wiki_link_str(s).into_iter().collect();
1770    }
1771    let Value::Sequence(items) = value else {
1772        return Vec::new();
1773    };
1774    // Unquoted scalar inline form `field: [[x]]` → `Seq[ Seq[String(x)] ]`.
1775    // (A quoted single-item list `["[[x]]"]` is `Seq[String]`, so its lone item
1776    // is a `String`, not a `Sequence`, and falls through to the list path below.)
1777    if items.len() == 1 {
1778        if let Some(link) = unquoted_inline_link(&items[0]) {
1779            return vec![link];
1780        }
1781    }
1782    // Otherwise a list of quoted wiki-link strings; non-string items (the
1783    // unquoted-list mis-encoding) are left for validate to flag.
1784    items
1785        .iter()
1786        .filter_map(|item| parse_wiki_link_str(item.as_str()?))
1787        .collect()
1788}
1789
1790/// Canonicalize one `extra` frontmatter value for emission by [`Frontmatter::to_yaml`].
1791///
1792/// The read path ([`Frontmatter::parse`]) stores every unknown key's raw parsed
1793/// [`Value`] verbatim, so a SPEC-canonical *unquoted* inline scalar wiki-link
1794/// (`company: [[records/companies/northstar]]`) lands in `extra` as the nested
1795/// shape YAML produces for it — `Seq[ Seq[String("records/companies/northstar")] ]`.
1796/// Re-emitting that verbatim yields the block sequence
1797///
1798/// ```text
1799/// company:
1800/// - - records/companies/northstar
1801/// ```
1802///
1803/// which has lost the `[[ ]]` brackets entirely: the link is destroyed, and every
1804/// reader (validate, graph, backlinks) stops seeing the edge. This normalizes such
1805/// a value back into the canonical emitted form before it is written:
1806///
1807/// - a **scalar** wiki-link (quoted `String("[[x]]")` or unquoted `Seq[Seq[String]]`,
1808///   one element) → a quoted scalar `Value::String("[[x]]")`, which serde_norway emits
1809///   inline as `'[[x]]'` — the form the finding confirms survives a round-trip and
1810///   that [`links_in_field_value`] reads back as the same scalar link;
1811/// - a **list** of wiki-links (in any spelling [`links_in_field_value`] accepts) →
1812///   a block `Value::Sequence` of quoted-link strings (`- "[[x]]"`), matching the
1813///   `set` write-in path and the canonical list form;
1814/// - everything else → returned verbatim (the common no-op for non-link values).
1815///
1816/// `|display` is preserved in both link branches. This is the single point that
1817/// keeps all three curator-loop writers (`format`, `fm set`, `link`) from
1818/// corrupting a pre-existing canonical link, since they all funnel through
1819/// `to_yaml`.
1820fn canonicalize_extra_value(value: &Value) -> Value {
1821    match value {
1822        // Scalar wiki-link, quoted form: `field: "[[x]]"` → `String("[[x]]")`.
1823        // Re-emit as a quoted scalar so it stays a string (never the brackets-as-
1824        // YAML nested sequence). Non-link strings are returned untouched.
1825        Value::String(s) => match parse_wiki_link_str(s) {
1826            Some(link) => Value::String(wiki_link_literal(&link)),
1827            None => value.clone(),
1828        },
1829        Value::Sequence(items) => {
1830            // Scalar wiki-link, unquoted inline form: `field: [[x]]` parses to a
1831            // one-element `Seq[ Seq[String(x)] ]`. Collapse back to the quoted
1832            // scalar string so the link is preserved rather than block-emitted.
1833            if items.len() == 1 {
1834                if let Some(link) = unquoted_inline_link(&items[0]) {
1835                    return Value::String(wiki_link_literal(&link));
1836                }
1837            }
1838            // List of wiki-links: re-emit as a block sequence of quoted-link
1839            // strings, the canonical list form `to_yaml` renders block-style and
1840            // `links_in_field_value` accepts. Only canonicalize when *every* item
1841            // is a clean single wiki-link; a list with any non-link item is left
1842            // verbatim so unrelated sequences (and the unquoted-list mis-encoding
1843            // validate flags) are untouched.
1844            let mut links = Vec::with_capacity(items.len());
1845            for item in items {
1846                match link_from_flow_list_item(item) {
1847                    Some(link) => links.push(link),
1848                    None => return value.clone(),
1849                }
1850            }
1851            if links.is_empty() {
1852                return value.clone();
1853            }
1854            Value::Sequence(
1855                links
1856                    .iter()
1857                    .map(|l| Value::String(wiki_link_literal(l)))
1858                    .collect(),
1859            )
1860        }
1861        // Mappings, scalars other than strings, nulls: nothing to canonicalize.
1862        _ => value.clone(),
1863    }
1864}
1865
1866/// Render a [`WikiLink`] back to its `[[target]]` / `[[target|display]]` literal,
1867/// the inner form the canonical writer emits and `links_in_field_value` accepts.
1868fn wiki_link_literal(link: &WikiLink) -> String {
1869    match &link.display {
1870        Some(d) => format!("[[{}|{}]]", link.target, d),
1871        None => format!("[[{}]]", link.target),
1872    }
1873}
1874
1875/// Recognize the inner token of an unquoted scalar `[[x]]`: after YAML strips the
1876/// outer brackets, the inner `[x]` is a single-element sequence `Seq[String(x)]`.
1877/// Reconstructs `[[x]]` (preserving any `|display`) and parses it, or returns
1878/// `None` when `v` is not that shape. Requiring a `Sequence` here is what keeps a
1879/// plain one-item flow list (`field: [x]` → `Seq[String]`, not `Seq[Seq[String]]`)
1880/// from being mistaken for a wiki-link.
1881fn unquoted_inline_link(v: &Value) -> Option<WikiLink> {
1882    let Value::Sequence(items) = v else {
1883        return None;
1884    };
1885    if items.len() != 1 {
1886        return None;
1887    }
1888    let s = items[0].as_str()?;
1889    // A clean unquoted wiki-link has no further brackets inside it.
1890    if s.contains('[') || s.contains(']') {
1891        return None;
1892    }
1893    parse_wiki_link_str(&format!("[[{s}]]"))
1894}
1895
1896/// Decide whether a `dbmd fm set` / `--fm` value string is a **list of
1897/// wiki-links** that should be stored as a YAML block sequence, returning the
1898/// canonical `Value::Sequence` of quoted-link strings when so.
1899///
1900/// The value path of every write surface stringifies its argument; without this
1901/// a required list-of-links field (`meeting.attendees`) was unwritable in valid
1902/// form — passing `[[[a]], [[b]]]` stored a single scalar string that mis-parses
1903/// and trips `WIKI_LINK_FLOW_FORM_LIST` / `WIKI_LINK_BROKEN`. This recognizes the
1904/// two list spellings an agent naturally types and normalizes both to the block
1905/// form the canonical writer emits and `dbmd validate` accepts:
1906///
1907/// - flow list of quoted links — `["[[a]]", "[[b]]"]`
1908/// - flow list of unquoted links — `[[[a]], [[b]]]` (YAML: `Seq[Seq[String], …]`)
1909///
1910/// Returns `None` (⇒ caller stores a verbatim scalar string) for everything that
1911/// is not unambiguously a list of clean wiki-links — plain text, a single inline
1912/// `[[x]]` (YAML reads it as a one-item `Seq[Seq[String]]`, kept scalar so it
1913/// renders inline), an empty list, or a list with any non-link item. A single
1914/// link must stay scalar; only genuine multi-item-or-explicit lists become
1915/// sequences, matching `links_in_field_value`'s acceptance rule so writer and
1916/// validator never disagree.
1917fn parse_link_list_value(value: &str) -> Option<Value> {
1918    let trimmed = value.trim();
1919    // Only a YAML *flow sequence* literal is a list candidate; anything not
1920    // wrapped in `[ … ]` is a scalar (a bare `[[x]]` is wrapped, and handled by
1921    // the single-inline-link guard below).
1922    if !(trimmed.starts_with('[') && trimmed.ends_with(']')) {
1923        return None;
1924    }
1925    let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(trimmed) else {
1926        return None;
1927    };
1928    // A single inline `[[x]]` parses to `Seq[ Seq[String(x)] ]` (one item, itself
1929    // a sequence) — that is the unquoted *scalar* form, not a list. Keep it scalar
1930    // so it round-trips to the inline `field: [[x]]` rather than a one-item block
1931    // list. `links_in_field_value` reads it back as a scalar link either way.
1932    if items.len() == 1 && unquoted_inline_link(&items[0]).is_some() {
1933        return None;
1934    }
1935    // Every item must resolve to exactly one clean wiki-link, in any of the flow
1936    // spellings an agent types (see [`link_from_flow_list_item`]).
1937    let mut links = Vec::with_capacity(items.len());
1938    for item in &items {
1939        links.push(link_from_flow_list_item(item)?);
1940    }
1941    if links.is_empty() {
1942        return None;
1943    }
1944    // Normalize to a block sequence of quoted-link strings — the form `to_yaml`
1945    // renders block-style and `links_in_field_value` accepts. `|display` is
1946    // preserved.
1947    let normalized = links
1948        .iter()
1949        .map(|l| Value::String(wiki_link_literal(l)))
1950        .collect();
1951    Some(Value::Sequence(normalized))
1952}
1953
1954/// Recognize one clean wiki-link from a single **item** of a YAML flow sequence,
1955/// across the spellings an agent types for a list. After top-level flow parsing,
1956/// a list item arrives in one of:
1957///
1958/// - quoted — `"[[x]]"` ⇒ `String("[[x]]")`
1959/// - unquoted in a flow list — `[[x]]` inside `[…]` ⇒ `Seq[ Seq[String(x)] ]`
1960///   (one level deeper than a bare unquoted scalar, because the surrounding list
1961///   adds a wrapper); unwrap the single-element wrapper, then read the inline
1962///   `Seq[String(x)]` with [`unquoted_inline_link`].
1963///
1964/// Returns `None` for any item that is not exactly one clean wiki-link, so the
1965/// caller falls back to a scalar string and never fabricates a partial list.
1966fn link_from_flow_list_item(item: &Value) -> Option<WikiLink> {
1967    match item {
1968        Value::String(s) => parse_wiki_link_str(s),
1969        Value::Sequence(inner) => {
1970            // Unquoted list item `[[x]]` → `Seq[ Seq[String(x)] ]`: peel the lone
1971            // wrapper to expose the inline-link shape `Seq[String(x)]`.
1972            //
1973            // Only this triple-nested shape is a wiki-link. We deliberately do
1974            // NOT fall back to `unquoted_inline_link(item)` on the bare double
1975            // nesting `Seq[String(x)]` (a plain one-element string list `[x]`):
1976            // that fallback fabricated a wiki-link out of an ordinary nested
1977            // string list — `groups: [[alpha], [beta]]` (data `[["alpha"],
1978            // ["beta"]]`) was rewritten to `- '[[alpha]]'` / `- '[[beta]]'`,
1979            // silently changing the field's type and manufacturing short-form
1980            // links the tool then flags as `WIKI_LINK_SHORT_FORM`. An unknown
1981            // nested string list must pass through verbatim (SPEC § "Unknown
1982            // fields pass through").
1983            if inner.len() == 1 {
1984                if let Some(link) = unquoted_inline_link(&inner[0]) {
1985                    return Some(link);
1986                }
1987            }
1988            None
1989        }
1990        _ => None,
1991    }
1992}
1993
1994/// A target is a full store-relative path when its first path segment is one of
1995/// the three canonical layer dirs and at least one `/` separator follows. A
1996/// trailing `.md` does not affect this classification.
1997fn target_is_full_path(target: &str) -> bool {
1998    let target = target.trim();
1999    match target.split_once('/') {
2000        Some((head, _rest)) => LAYER_DIRS.contains(&head),
2001        None => false,
2002    }
2003}
2004
2005/// True when the target carries a trailing `.md` extension (validate warns
2006/// `WIKI_LINK_HAS_EXTENSION`).
2007fn target_has_md_extension(target: &str) -> bool {
2008    target.trim().ends_with(".md")
2009}
2010
2011/// A forward-only cursor that yields the 1-based character (Unicode scalar)
2012/// column of successive byte offsets within a single line in ONE linear pass.
2013///
2014/// The previous helper recomputed `line[..offset].chars().count()` from the line
2015/// start for every match, so a line with N matches cost O(N × line_len) — a
2016/// quadratic blowup on a link-dense line. Because regex matches arrive in
2017/// non-decreasing byte order, this cursor advances the char count only across the
2018/// gap since the last queried offset, giving O(line_len) total per line.
2019///
2020/// Offsets MUST be queried in non-decreasing order and must fall on UTF-8
2021/// character boundaries (regex match starts always do).
2022struct ColCursor {
2023    byte: usize,
2024    chars: u32,
2025}
2026
2027impl ColCursor {
2028    fn new() -> Self {
2029        ColCursor { byte: 0, chars: 0 }
2030    }
2031
2032    /// 1-based character column of `byte_offset` in `line`. `byte_offset` must be
2033    /// `>=` every previously queried offset (debug-asserted).
2034    fn column_at(&mut self, line: &str, byte_offset: usize) -> u32 {
2035        debug_assert!(byte_offset >= self.byte, "ColCursor queried out of order");
2036        self.chars += line[self.byte..byte_offset].chars().count() as u32;
2037        self.byte = byte_offset;
2038        self.chars + 1
2039    }
2040}
2041
2042/// Index of the first comma-token in `raw[from..]` that *starts a greedy
2043/// modifier clause* (`enum`, `enum:…`, or `default …`), or `raw.len()` when none
2044/// remain. Used to bound a greedy `default`/`enum` value so it stops at the next
2045/// such clause instead of either truncating at the first comma or swallowing a
2046/// following greedy clause whole.
2047fn next_greedy_clause(raw: &[&str], from: usize) -> usize {
2048    let mut j = from;
2049    while j < raw.len() {
2050        let lower = raw[j].trim().to_ascii_lowercase();
2051        if lower == "enum" || lower.starts_with("enum:") || lower.starts_with("default ") {
2052            return j;
2053        }
2054        j += 1;
2055    }
2056    raw.len()
2057}
2058
2059/// Map a lowercase shape keyword to its [`Shape`].
2060fn shape_from_str(s: &str) -> Option<Shape> {
2061    match s {
2062        "string" => Some(Shape::String),
2063        "int" => Some(Shape::Int),
2064        "bool" => Some(Shape::Bool),
2065        "date" => Some(Shape::Date),
2066        "email" => Some(Shape::Email),
2067        "currency" => Some(Shape::Currency),
2068        "url" => Some(Shape::Url),
2069        _ => None,
2070    }
2071}
2072
2073/// The ATX heading level of a line (number of leading `#`), or 0 if not a
2074/// heading. Up to three leading spaces (CommonMark), requires a space/tab (or
2075/// end-of-line) after the `#` run, caps the run at six.
2076fn heading_level(line: &str) -> u8 {
2077    let indent = line.len() - line.trim_start_matches(' ').len();
2078    if indent > 3 {
2079        return 0;
2080    }
2081    let rest = &line[indent..];
2082    let hashes = rest.len() - rest.trim_start_matches('#').len();
2083    if hashes == 0 || hashes > 6 {
2084        return 0;
2085    }
2086    let after = &rest[hashes..];
2087    if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
2088        hashes as u8
2089    } else {
2090        0
2091    }
2092}
2093
2094/// The heading text after the `#` run, trimmed, with a trailing ATX *closing*
2095/// `#` sequence removed per CommonMark (`## Title ##` → `Title`).
2096///
2097/// CommonMark only treats a trailing run of `#` as a closing sequence when it is
2098/// **preceded by a space or tab** (or the content is empty). A `#` that abuts the
2099/// preceding word is literal heading text: `## C#` → `C#`, `## F#` → `F#`,
2100/// `## issue-123#` → `issue-123#`. The old unconditional `trim_end_matches('#')`
2101/// stripped those, corrupting `dbmd sections`/`outline` heading text and — via
2102/// `parse_db_md` using the heading verbatim as the schema type key — silently
2103/// binding a `### c#` schema to `type: c` instead of `type: c#`.
2104fn heading_text(line: &str, level: u8) -> String {
2105    let indent = line.len() - line.trim_start_matches(' ').len();
2106    let after_hashes = &line[indent + level as usize..];
2107    let trimmed = after_hashes.trim();
2108
2109    // Peel a trailing run of `#`. It is a closing sequence only if what precedes
2110    // it (within `trimmed`) is empty or ends in a space/tab; otherwise the `#`s
2111    // are literal content.
2112    let without_hashes = trimmed.trim_end_matches('#');
2113    if without_hashes.len() == trimmed.len() {
2114        // No trailing `#` at all.
2115        return trimmed.to_string();
2116    }
2117    if without_hashes.is_empty() || without_hashes.ends_with([' ', '\t']) {
2118        // A genuine closing sequence (`## Title ##`, `## ##`): drop it and the
2119        // whitespace before it.
2120        without_hashes.trim_end().to_string()
2121    } else {
2122        // The `#` run abuts content (`## C#`): keep it as literal heading text.
2123        trimmed.to_string()
2124    }
2125}
2126
2127/// If `line` opens a fenced code block, return `(fence byte, run length)`.
2128fn opening_fence(line: &str) -> Option<(u8, usize)> {
2129    let indent = line.len() - line.trim_start_matches(' ').len();
2130    if indent > 3 {
2131        return None;
2132    }
2133    let rest = &line[indent..];
2134    let byte = rest.bytes().next()?;
2135    if byte != b'`' && byte != b'~' {
2136        return None;
2137    }
2138    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2139    if run < 3 {
2140        return None;
2141    }
2142    // A backtick fence's info string may not itself contain a backtick.
2143    if byte == b'`' && rest[run..].contains('`') {
2144        return None;
2145    }
2146    Some((byte, run))
2147}
2148
2149/// True if `line` closes the currently open fence: same char, run at least as
2150/// long, nothing but trailing whitespace after.
2151fn is_closing_fence(line: &str, fence: (u8, usize)) -> bool {
2152    let (byte, open_len) = fence;
2153    let indent = line.len() - line.trim_start_matches(' ').len();
2154    if indent > 3 {
2155        return false;
2156    }
2157    let rest = &line[indent..];
2158    let run = rest.len() - rest.trim_start_matches(byte as char).len();
2159    if run < open_len {
2160        return false;
2161    }
2162    rest[run..].trim().is_empty()
2163}
2164
2165/// The prose body of a section: everything after the heading line, trimmed.
2166fn section_prose(section_body: &str) -> String {
2167    match section_body.split_once('\n') {
2168        Some((_heading, rest)) => rest.trim().to_string(),
2169        None => String::new(),
2170    }
2171}
2172
2173/// The bullet lines (`-`/`*`/`+`) of a section body, excluding the heading
2174/// line, each returned with its leading whitespace trimmed.
2175fn bullet_lines(section_body: &str) -> Vec<String> {
2176    section_body
2177        .lines()
2178        .skip(1) // the heading line
2179        .map(str::trim)
2180        .filter(|l| l.starts_with("- ") || l.starts_with("* ") || l.starts_with("+ "))
2181        .map(|l| l.to_string())
2182        .collect()
2183}
2184
2185/// Cut a bullet's content at the first comment separator, returning only the
2186/// meaningful prefix. Recognizes the em-dash (` — `), en-dash (` – `), double-
2187/// hyphen (` -- `), and the plain single-ASCII-hyphen (` - `) spellings an
2188/// operator naturally types — without the single-hyphen form, a comment like
2189/// `records/decisions/q3.md - finalized` left the whole line (comment included)
2190/// as the frozen path, so the entry never matched and the freeze failed OPEN.
2191/// A store-relative path never contains a ` - ` (paths are `/`-joined, spaceless),
2192/// so this does not truncate legitimate path text.
2193fn strip_bullet_comment(content: &str) -> &str {
2194    let mut cut = content.len();
2195    for sep in [" — ", " -- ", " – ", " - "] {
2196        if let Some(idx) = content.find(sep) {
2197            cut = cut.min(idx);
2198        }
2199    }
2200    content[..cut].trim()
2201}
2202
2203/// Strip the leading bullet marker, returning the trimmed content after it.
2204fn bullet_content(bullet: &str) -> &str {
2205    let t = bullet.trim();
2206    t.strip_prefix("- ")
2207        .or_else(|| t.strip_prefix("* "))
2208        .or_else(|| t.strip_prefix("+ "))
2209        .unwrap_or(t)
2210        .trim()
2211}
2212
2213/// Extract a store-relative path from a Frozen-pages bullet. The path may be
2214/// wrapped in backticks and followed by an em-dash comment.
2215fn extract_path_bullet(bullet: &str) -> String {
2216    let content = bullet_content(bullet);
2217    // Prefer a backtick-delimited span if present.
2218    if let Some(start) = content.find('`') {
2219        if let Some(end_rel) = content[start + 1..].find('`') {
2220            return content[start + 1..start + 1 + end_rel].trim().to_string();
2221        }
2222    }
2223    // Otherwise take the text up to a comment separator, stripping quotes.
2224    strip_bullet_comment(content)
2225        .trim_matches('"')
2226        .trim_matches('\'')
2227        .trim()
2228        .to_string()
2229}
2230
2231/// Extract a comma-separated type list from an Ignored-types bullet, stripping
2232/// backticks/quotes and any trailing em-dash comment.
2233fn extract_type_list_bullet(bullet: &str) -> Vec<String> {
2234    let content = strip_bullet_comment(bullet_content(bullet));
2235    content
2236        .split(',')
2237        .map(|t| {
2238            t.trim()
2239                .trim_matches('`')
2240                .trim_matches('"')
2241                .trim_matches('\'')
2242                .trim()
2243                .to_string()
2244        })
2245        .filter(|t| !t.is_empty())
2246        .collect()
2247}
2248
2249#[cfg(test)]
2250mod tests {
2251    use super::*;
2252    use std::path::Path;
2253    use tempfile::tempdir;
2254
2255    // ── Config::frozen_match (the single write-surface policy matcher) ───────
2256
2257    #[test]
2258    fn frozen_match_is_md_insensitive_both_directions() {
2259        // A policy entry stored WITHOUT `.md` (the natural extensionless
2260        // spelling `parse_db_md` keeps verbatim) must still match a `.md`
2261        // write target — the regression every write surface had.
2262        let cfg = Config {
2263            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
2264            ..Config::default()
2265        };
2266        assert_eq!(
2267            cfg.frozen_match(Path::new("records/decisions/q1.md")),
2268            Some(PathBuf::from("records/decisions/q1")),
2269            "extensionless policy entry must freeze the .md file"
2270        );
2271        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
2272
2273        // The symmetric case: a policy entry WITH `.md` matches a bare target.
2274        let cfg = Config {
2275            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
2276            ..Config::default()
2277        };
2278        assert_eq!(
2279            cfg.frozen_match(Path::new("records/decisions/q1")),
2280            Some(PathBuf::from("records/decisions/q1.md")),
2281        );
2282        // And the same-spelling cases still match.
2283        assert!(cfg.is_frozen(Path::new("records/decisions/q1.md")));
2284    }
2285
2286    #[test]
2287    fn frozen_match_drops_leading_dot_slash() {
2288        let cfg = Config {
2289            frozen_pages: vec![PathBuf::from("records/decisions/q1.md")],
2290            ..Config::default()
2291        };
2292        assert!(cfg.is_frozen(Path::new("./records/decisions/q1.md")));
2293        assert!(cfg.is_frozen(Path::new("./records/decisions/q1")));
2294    }
2295
2296    #[test]
2297    fn frozen_match_returns_none_for_unlisted_and_prefix_paths() {
2298        let cfg = Config {
2299            frozen_pages: vec![PathBuf::from("records/decisions/q1")],
2300            ..Config::default()
2301        };
2302        assert!(cfg
2303            .frozen_match(Path::new("records/decisions/q2.md"))
2304            .is_none());
2305        // A prefix is not a match: `q1` must not freeze `q1-draft`.
2306        assert!(cfg
2307            .frozen_match(Path::new("records/decisions/q1-draft.md"))
2308            .is_none());
2309        assert!(!cfg.is_frozen(Path::new("records/decisions/q11.md")));
2310    }
2311
2312    // ── split_frontmatter ───────────────────────────────────────────────────
2313
2314    #[test]
2315    fn split_frontmatter_separates_yaml_and_verbatim_body() {
2316        let text = "---\ntype: contact\nsummary: x\n---\n# Heading\n\nBody line.\n";
2317        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2318        assert_eq!(p.frontmatter_yaml, "type: contact\nsummary: x\n");
2319        // Body is everything after the closing fence's newline, byte-for-byte.
2320        assert_eq!(p.body, "# Heading\n\nBody line.\n");
2321    }
2322
2323    #[test]
2324    fn split_frontmatter_preserves_body_without_trailing_newline() {
2325        let text = "---\ntype: x\n---\nno trailing newline";
2326        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2327        assert_eq!(p.body, "no trailing newline");
2328    }
2329
2330    #[test]
2331    fn split_frontmatter_empty_body_when_nothing_after_fence() {
2332        let text = "---\ntype: x\n---\n";
2333        let p = split_frontmatter(text, Path::new("f.md")).unwrap();
2334        assert_eq!(p.body, "");
2335    }
2336
2337    #[test]
2338    fn split_frontmatter_missing_opening_fence_errors() {
2339        let text = "# No frontmatter here\ntype: x\n";
2340        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2341        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2342    }
2343
2344    #[test]
2345    fn split_frontmatter_leading_content_before_fence_rejected() {
2346        // The opening fence must be the very first line; a blank line first is
2347        // not allowed.
2348        let text = "\n---\ntype: x\n---\nbody";
2349        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2350        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2351    }
2352
2353    #[test]
2354    fn split_frontmatter_unterminated_block_errors() {
2355        let text = "---\ntype: x\nsummary: y\n";
2356        let err = split_frontmatter(text, Path::new("f.md")).unwrap_err();
2357        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
2358    }
2359
2360    // ── Frontmatter::parse ───────────────────────────────────────────────────
2361
2362    #[test]
2363    fn parse_populates_typed_fields_and_routes_unknowns_to_extra() {
2364        let yaml = "type: contact\nid: sarah-chen\nsummary: Director of Ops\nstatus: active\ntags: [vip, renewal]\nemail: sarah@northstar.io\nrole: Director";
2365        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
2366        assert_eq!(fm.type_.as_deref(), Some("contact"));
2367        assert_eq!(fm.id.as_deref(), Some("sarah-chen"));
2368        assert_eq!(fm.summary.as_deref(), Some("Director of Ops"));
2369        assert_eq!(fm.status.as_deref(), Some("active"));
2370        assert_eq!(fm.tags, vec!["vip".to_string(), "renewal".to_string()]);
2371        // Type-specific fields are NOT promoted to typed slots.
2372        assert!(fm.type_.is_some() && !fm.extra.contains_key("type"));
2373        assert!(!fm.extra.contains_key("tags"));
2374        assert_eq!(
2375            fm.extra.get("email").and_then(|v| v.as_str()),
2376            Some("sarah@northstar.io")
2377        );
2378        assert_eq!(
2379            fm.extra.get("role").and_then(|v| v.as_str()),
2380            Some("Director")
2381        );
2382    }
2383
2384    #[test]
2385    fn parse_reads_rfc3339_timestamps() {
2386        let yaml =
2387            "type: email\ncreated: 2026-05-27T08:00:00-07:00\nupdated: 2026-05-28T09:30:00-07:00";
2388        let fm = Frontmatter::parse(yaml, Path::new("f.md")).unwrap();
2389        let created = fm.created.expect("created parsed");
2390        // -07:00 offset is 7 * 3600 seconds west.
2391        assert_eq!(created.offset().utc_minus_local(), 7 * 3600);
2392        assert_eq!(created.to_rfc3339(), "2026-05-27T08:00:00-07:00");
2393        assert!(fm.updated.is_some());
2394    }
2395
2396    #[test]
2397    fn parse_rejects_non_rfc3339_timestamp() {
2398        // A date-only value is not a full RFC3339 timestamp; created/updated
2399        // require the full form.
2400        let yaml = "type: email\ncreated: 2026-05-27";
2401        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
2402        match err {
2403            ParseError::BadTimestamp { key, value, .. } => {
2404                assert_eq!(key, "created");
2405                assert_eq!(value, "2026-05-27");
2406            }
2407            other => panic!("expected BadTimestamp, got {other:?}"),
2408        }
2409    }
2410
2411    #[test]
2412    fn parse_malformed_yaml_errors() {
2413        // Unclosed flow mapping is invalid YAML.
2414        let yaml = "type: contact\n  bad: : :\n- nope";
2415        let err = Frontmatter::parse(yaml, Path::new("bad.md")).unwrap_err();
2416        assert!(matches!(err, ParseError::MalformedYaml { .. }));
2417    }
2418
2419    #[test]
2420    fn frontmatter_with_yaml_tag_on_mapping_does_not_panic() {
2421        // Regression: a YAML tag on the top-level mapping made the old
2422        // `expect_err` path PANIC, because a tagged mapping deserializes to a
2423        // `Mapping` just fine. It must now be handled — accepted as the inner
2424        // mapping, never a panic.
2425        let fm = Frontmatter::parse("!mytag\ntype: contact\nsummary: hi\n", Path::new("x.md"))
2426            .expect("tagged-mapping frontmatter must parse, not panic");
2427        assert_eq!(fm.type_.as_deref(), Some("contact"));
2428        // A genuine scalar/sequence top level is still malformed (and still
2429        // doesn't panic).
2430        assert!(Frontmatter::parse("- a\n- b\n", Path::new("x.md")).is_err());
2431    }
2432
2433    #[test]
2434    fn parse_empty_block_is_empty_frontmatter() {
2435        let fm = Frontmatter::parse("", Path::new("f.md")).unwrap();
2436        assert_eq!(fm, Frontmatter::default());
2437    }
2438
2439    #[test]
2440    fn parse_scalar_top_level_is_malformed() {
2441        // A bare scalar at the top level is not a frontmatter mapping.
2442        let err = Frontmatter::parse("just a string", Path::new("f.md")).unwrap_err();
2443        assert!(matches!(err, ParseError::MalformedYaml { .. }));
2444    }
2445
2446    // ── to_yaml canonical order ──────────────────────────────────────────────
2447
2448    #[test]
2449    fn to_yaml_emits_canonical_key_order() {
2450        let mut fm = Frontmatter {
2451            type_: Some("contact".into()),
2452            id: Some("sarah-chen".into()),
2453            summary: Some("Director of Ops".into()),
2454            status: Some("active".into()),
2455            tags: vec!["vip".into()],
2456            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2457            updated: Some(DateTime::parse_from_rfc3339("2026-05-28T09:30:00-07:00").unwrap()),
2458            ..Default::default()
2459        };
2460        // Two type-specific fields, inserted in NON-alphabetical order to prove
2461        // the writer sorts them (BTreeMap) between the universal head and tail.
2462        fm.extra
2463            .insert("role".into(), Value::String("Director".into()));
2464        fm.extra.insert(
2465            "company".into(),
2466            Value::String("[[records/companies/northstar]]".into()),
2467        );
2468
2469        let yaml = fm.to_yaml();
2470        let keys: Vec<&str> = yaml
2471            .lines()
2472            .filter(|l| !l.starts_with(['-', ' ']) && l.contains(':'))
2473            .map(|l| l.split(':').next().unwrap())
2474            .collect();
2475        assert_eq!(
2476            keys,
2477            vec![
2478                "type", "id", "created", "updated", "summary", // universal head
2479                "company", "role",   // type-specific, sorted
2480                "status", // universal tail
2481                "tags",
2482            ],
2483            "canonical order violated; got:\n{yaml}"
2484        );
2485        // Timestamps round-trip as RFC3339 strings (YAML may quote them).
2486        assert!(
2487            yaml.contains("2026-05-27T08:00:00-07:00"),
2488            "created timestamp missing; got:\n{yaml}"
2489        );
2490        // The value re-parses to the same instant regardless of quoting.
2491        let reparsed = Frontmatter::parse(&yaml, Path::new("rt.md")).unwrap();
2492        assert_eq!(reparsed.created, fm.created);
2493        assert_eq!(reparsed.updated, fm.updated);
2494    }
2495
2496    #[test]
2497    fn to_yaml_omits_absent_optional_fields() {
2498        let fm = Frontmatter {
2499            type_: Some("note".into()),
2500            ..Default::default()
2501        };
2502        let yaml = fm.to_yaml();
2503        assert!(yaml.contains("type: note"));
2504        assert!(!yaml.contains("status"));
2505        assert!(!yaml.contains("tags"));
2506        assert!(!yaml.contains("summary"));
2507    }
2508
2509    // ── Regression: non-string scalar universal fields round-trip (finding #1) ─
2510
2511    #[test]
2512    fn regression_parse_preserves_non_string_scalar_universal_fields() {
2513        // A hand/externally-authored file whose universal fields are bare
2514        // scalars YAML reads as Number/Bool — `id: 100`, `summary: 2026`,
2515        // `status: 0`, `type: 42` — must be PRESERVED as their string form, not
2516        // read as None. Before the fix, `v.as_str()` returned None for these and
2517        // the matched arm discarded the value entirely (never reaching `extra`).
2518        let yaml = "type: 42\nid: 100\nsummary: 2026\nstatus: 0";
2519        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
2520        assert_eq!(fm.type_.as_deref(), Some("42"), "type scalar dropped");
2521        assert_eq!(fm.id.as_deref(), Some("100"), "id scalar dropped");
2522        assert_eq!(
2523            fm.summary.as_deref(),
2524            Some("2026"),
2525            "summary scalar dropped"
2526        );
2527        assert_eq!(fm.status.as_deref(), Some("0"), "status scalar dropped");
2528        // The values must surface through the public `get` accessor too.
2529        assert_eq!(
2530            fm.get("summary")
2531                .and_then(|v| v.as_str().map(str::to_string)),
2532            Some("2026".to_string())
2533        );
2534    }
2535
2536    #[test]
2537    fn regression_format_round_trip_does_not_delete_numeric_frontmatter() {
2538        // The exact finding-#1 trigger: `dbmd format` is read_file -> write_file.
2539        // A file whose `id`/`summary`/`status` are bare numeric scalars must
2540        // still carry those fields after the canonical re-emit. Before the fix,
2541        // the lines were silently deleted from disk (only `type` survived).
2542        let dir = tempdir().unwrap();
2543        let path = dir.path().join("x.md");
2544        let original = "---\ntype: contact\nid: 100\nsummary: 2026\nstatus: 0\n---\nbody\n";
2545        std::fs::write(&path, original).unwrap();
2546
2547        // Re-emit through the canonical writer, exactly as `dbmd format` does.
2548        let (fm, body) = read_file(&path).unwrap();
2549        write_file(&path, &fm, &body).unwrap();
2550
2551        let after = std::fs::read_to_string(&path).unwrap();
2552        // None of the four fields may vanish; they survive as string scalars.
2553        let reparsed = Frontmatter::parse(
2554            &split_frontmatter(&after, &path).unwrap().frontmatter_yaml,
2555            &path,
2556        )
2557        .unwrap();
2558        assert_eq!(reparsed.type_.as_deref(), Some("contact"));
2559        assert_eq!(reparsed.id.as_deref(), Some("100"), "id deleted by format");
2560        assert_eq!(
2561            reparsed.summary.as_deref(),
2562            Some("2026"),
2563            "summary deleted by format"
2564        );
2565        assert_eq!(
2566            reparsed.status.as_deref(),
2567            Some("0"),
2568            "status deleted by format"
2569        );
2570        // The body is preserved verbatim.
2571        assert_eq!(body, "body\n");
2572    }
2573
2574    #[test]
2575    fn regression_format_round_trip_preserves_oversized_integer_frontmatter() {
2576        // Adversarial review #6: a bare integer literal beyond i64/u64 range must
2577        // survive `dbmd format` (read_file -> write_file) byte-for-byte. Before
2578        // the fix, serde_norway silently truncated `> u128::MAX` to f64 (`999…9`
2579        // -> `1e39`) and hard-rejected `(u64::MAX, u128::MAX]` — corrupting an
2580        // imported numeric ID and breaking the unknown-field round-trip contract.
2581        let dir = tempdir().unwrap();
2582        let path = dir.path().join("x.md");
2583        let big = "999999999999999999999999999999999999999"; // 39 digits, > u128::MAX
2584        let mid = "99999999999999999999"; // 20 digits, in (u64::MAX, u128::MAX]
2585        let original = format!(
2586            "---\ntype: contact\nsummary: x\naccount_number: {big}\nid_num: {mid}\n---\nbody\n"
2587        );
2588        std::fs::write(&path, &original).unwrap();
2589
2590        // Two round-trips: the value must survive verbatim AND be idempotent.
2591        for _ in 0..2 {
2592            let (fm, body) = read_file(&path).expect("oversized-int frontmatter must parse");
2593            write_file(&path, &fm, &body).unwrap();
2594            let after = std::fs::read_to_string(&path).unwrap();
2595            assert!(
2596                after.contains(big),
2597                "39-digit integer corrupted by format:\n{after}"
2598            );
2599            assert!(
2600                after.contains(mid),
2601                "20-digit integer corrupted by format:\n{after}"
2602            );
2603            assert!(
2604                !after.to_lowercase().contains("1e39"),
2605                "integer was truncated to a float:\n{after}"
2606            );
2607            assert_eq!(body, "body\n", "body must be preserved verbatim");
2608        }
2609    }
2610
2611    #[test]
2612    fn oversized_int_literal_detection_is_precise() {
2613        // In range (serde_norway handles losslessly) → never quoted.
2614        for ok in [
2615            "0",
2616            "42",
2617            "-17",
2618            "9223372036854775807",
2619            "18446744073709551615",
2620            "12.5",
2621            "007",
2622            "abc",
2623            "",
2624        ] {
2625            assert!(
2626                !is_oversized_int_literal(ok),
2627                "must NOT be flagged oversized: {ok:?}"
2628            );
2629        }
2630        // Beyond i64/u64 → quoted to preserve the literal.
2631        for big in [
2632            "18446744073709551616",                    // u64::MAX + 1
2633            "99999999999999999999",                    // 20 digits
2634            "999999999999999999999999999999999999999", // 39 digits
2635            "-9999999999999999999999",                 // very negative
2636        ] {
2637            assert!(
2638                is_oversized_int_literal(big),
2639                "must be flagged oversized: {big:?}"
2640            );
2641        }
2642    }
2643
2644    // ── Regression: BOM-prefixed files parse like store/index (finding #19) ────
2645
2646    #[test]
2647    fn regression_split_frontmatter_tolerates_leading_utf8_bom() {
2648        // A BOM-prefixed file (EF BB BF + `---\n...`) is walked and indexed by
2649        // `dbmd index` (store/index strip the BOM) but, before the fix, every
2650        // write/edit surface routed through `read_file` hard-failed with
2651        // MissingFrontmatter. `split_frontmatter` must now strip a single leading
2652        // U+FEFF and emit a BOM-free body.
2653        let text = "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n";
2654        let parsed = split_frontmatter(text, Path::new("note.md")).unwrap();
2655        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
2656        // Body never carries the BOM forward into the canonical writer.
2657        assert_eq!(parsed.body, "body\n");
2658        assert!(!parsed.body.starts_with('\u{feff}'));
2659    }
2660
2661    #[test]
2662    fn regression_read_file_parses_bom_prefixed_file() {
2663        // End-to-end through the same `read_file` path `dbmd fm get/set`,
2664        // `format`, `link`, and `write` use. Before the fix this returned
2665        // Err(MissingFrontmatter) on a file the catalog had already indexed.
2666        let dir = tempdir().unwrap();
2667        let path = dir.path().join("note.md");
2668        std::fs::write(&path, "\u{feff}---\ntype: note\nsummary: x\n---\nbody\n").unwrap();
2669
2670        let (fm, body) = read_file(&path).expect("BOM-prefixed file must parse");
2671        assert_eq!(fm.type_.as_deref(), Some("note"));
2672        assert_eq!(fm.summary.as_deref(), Some("x"));
2673        assert_eq!(body, "body\n");
2674    }
2675
2676    #[test]
2677    fn to_yaml_preserves_unquoted_scalar_wiki_link_round_trip() {
2678        // Regression (PRIMARY): the SPEC-canonical scalar wiki-link is the
2679        // *unquoted* inline `company: [[records/companies/northstar]]`
2680        // (SPEC § Linking, the worked `contact` example). YAML parses it to the
2681        // nested `Seq[Seq[String]]` shape and `parse` stores that verbatim in
2682        // `extra`. Before the fix, `to_yaml` re-emitted it block-style as
2683        //     company:
2684        //     - - records/companies/northstar
2685        // — the `[[ ]]` brackets GONE — so a no-op re-emit (`dbmd format`, and
2686        // any `fm set` / `link` write) silently destroyed the link.
2687        let yaml = "type: contact\ncompany: [[records/companies/northstar]]";
2688        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2689        // Sanity: it really parsed as the nested sequence, not a string.
2690        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
2691
2692        let out = fm.to_yaml();
2693        // The link must survive as a quoted inline scalar — brackets intact, and
2694        // never the bracket-less block sequence `- - records/...`.
2695        assert!(
2696            out.contains("[[records/companies/northstar]]"),
2697            "canonical writer dropped the wiki-link brackets; got:\n{out}"
2698        );
2699        assert!(
2700            !out.contains("- - "),
2701            "canonical writer emitted a nested block sequence (link corrupted); got:\n{out}"
2702        );
2703
2704        // And it round-trips: re-parsing the emitted YAML still surfaces exactly
2705        // one link with the right target (the edge graph/backlinks rely on).
2706        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
2707        let fields = reparsed.link_fields();
2708        let links: Vec<(&str, &str, Option<&str>)> = fields
2709            .iter()
2710            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
2711            .collect();
2712        assert_eq!(
2713            links,
2714            vec![("company", "records/companies/northstar", None)]
2715        );
2716
2717        // A second re-emit is a fixed point — no progressive corruption across
2718        // repeated curator-loop writes.
2719        assert_eq!(
2720            reparsed.to_yaml(),
2721            out,
2722            "to_yaml is not idempotent on links"
2723        );
2724    }
2725
2726    #[test]
2727    fn to_yaml_preserves_unquoted_scalar_link_with_display() {
2728        // The `|display` segment must survive the unquoted-inline round-trip too.
2729        let yaml = "type: contact\ncompany: [[records/companies/northstar|Northstar]]";
2730        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
2731        let out = fm.to_yaml();
2732        assert!(
2733            out.contains("[[records/companies/northstar|Northstar]]"),
2734            "display segment lost on round-trip; got:\n{out}"
2735        );
2736        let reparsed = Frontmatter::parse(&out, Path::new("c.md")).unwrap();
2737        let f = reparsed.link_fields();
2738        assert_eq!(f.len(), 1);
2739        assert_eq!(f[0].1.target, "records/companies/northstar");
2740        assert_eq!(f[0].1.display.as_deref(), Some("Northstar"));
2741    }
2742
2743    #[test]
2744    fn to_yaml_does_not_mangle_link_list_or_plain_nested_sequence() {
2745        // A genuine quoted block list of links round-trips as a clean string
2746        // list — never collapsed to a scalar — and a plain nested sequence that
2747        // is NOT a wiki-link is left exactly as written (no false conversion).
2748        let yaml = "type: meeting\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nmatrix:\n  - - 1\n    - 2";
2749        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
2750        let out = fm.to_yaml();
2751
2752        // Both attendee links survive as quoted strings.
2753        assert!(out.contains("[[records/contacts/elena]]"), "got:\n{out}");
2754        assert!(out.contains("[[records/contacts/sarah]]"), "got:\n{out}");
2755
2756        let reparsed = Frontmatter::parse(&out, Path::new("m.md")).unwrap();
2757        let fields = reparsed.link_fields();
2758        let attendees: Vec<&str> = fields
2759            .iter()
2760            .filter(|(k, _)| k == "attendees")
2761            .map(|(_, l)| l.target.as_str())
2762            .collect();
2763        assert_eq!(
2764            attendees,
2765            vec!["records/contacts/elena", "records/contacts/sarah"]
2766        );
2767        // The non-link nested sequence is preserved verbatim, not touched.
2768        assert_eq!(reparsed.extra.get("matrix"), fm.extra.get("matrix"));
2769    }
2770
2771    // ── read_file / write_file round-trip ────────────────────────────────────
2772
2773    #[test]
2774    fn write_then_read_roundtrips_and_preserves_body_verbatim() {
2775        let dir = tempdir().unwrap();
2776        let path = dir.path().join("sources/emails/x.md");
2777        let body = "# Subject\n\nHello,\n\nSee [[records/contacts/sarah-chen]].\n";
2778        let mut fm = Frontmatter {
2779            type_: Some("email".into()),
2780            summary: Some("renewal note".into()),
2781            created: Some(DateTime::parse_from_rfc3339("2026-05-27T08:00:00-07:00").unwrap()),
2782            ..Default::default()
2783        };
2784        fm.extra
2785            .insert("from".into(), Value::String("elena@northstar.io".into()));
2786
2787        write_file(&path, &fm, body).unwrap();
2788
2789        let (read_fm, read_body) = read_file(&path).unwrap();
2790        assert_eq!(read_body, body, "body must be preserved byte-for-byte");
2791        assert_eq!(read_fm.type_.as_deref(), Some("email"));
2792        assert_eq!(read_fm.summary.as_deref(), Some("renewal note"));
2793        assert_eq!(
2794            read_fm.extra.get("from").and_then(|v| v.as_str()),
2795            Some("elena@northstar.io")
2796        );
2797        // The on-disk file starts with a fence and ends with the verbatim body.
2798        let raw = std::fs::read_to_string(&path).unwrap();
2799        assert!(raw.starts_with("---\n"));
2800        assert!(raw.ends_with(body));
2801    }
2802
2803    #[test]
2804    fn roundtrip_modify_summary_then_write_changes_only_summary() {
2805        let dir = tempdir().unwrap();
2806        let path = dir.path().join("records/contacts/sarah.md");
2807        let body = "Long-form operator notes about Sarah.\n";
2808        let fm = Frontmatter {
2809            type_: Some("contact".into()),
2810            summary: Some("old summary".into()),
2811            ..Default::default()
2812        };
2813        write_file(&path, &fm, body).unwrap();
2814
2815        // Read → modify summary → write back.
2816        let (mut fm2, body2) = read_file(&path).unwrap();
2817        fm2.summary = Some("new summary".into());
2818        write_file(&path, &fm2, &body2).unwrap();
2819
2820        let (fm3, body3) = read_file(&path).unwrap();
2821        assert_eq!(fm3.summary.as_deref(), Some("new summary"));
2822        assert_eq!(fm3.type_.as_deref(), Some("contact"));
2823        assert_eq!(body3, body, "body unchanged across the round-trip");
2824    }
2825
2826    #[test]
2827    fn roundtrip_preserves_handwritten_unquoted_scalar_wiki_link_on_disk() {
2828        // End-to-end analog of `dbmd format` on the verbatim SPEC worked example:
2829        // a hand-written file carrying the canonical UNQUOTED scalar link
2830        // `company: [[records/companies/northstar]]`, read from disk then written
2831        // back unchanged. Before the fix this no-op re-emit rewrote the on-disk
2832        // value to the bracket-less block sequence `company:\n- - records/...`,
2833        // and every reader (validate/graph/backlinks) then lost the edge.
2834        let dir = tempdir().unwrap();
2835        let path = dir.path().join("records/contacts/sarah-chen.md");
2836        let file = "---\ntype: contact\nid: sarah-chen\nsummary: Director of Ops\ncompany: [[records/companies/northstar]]\n---\n# Sarah Chen\n\nNotes.\n";
2837        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2838        std::fs::write(&path, file).unwrap();
2839
2840        // Read → write back unchanged (the canonical no-op re-emit).
2841        let (fm, body) = read_file(&path).unwrap();
2842        write_file(&path, &fm, &body).unwrap();
2843
2844        // On-disk bytes still carry the bracketed link, never `- - records/...`.
2845        let raw = std::fs::read_to_string(&path).unwrap();
2846        assert!(
2847            raw.contains("[[records/companies/northstar]]"),
2848            "on-disk wiki-link brackets were destroyed; got:\n{raw}"
2849        );
2850        assert!(
2851            !raw.contains("- - "),
2852            "on-disk value became a nested block sequence; got:\n{raw}"
2853        );
2854
2855        // And the edge is still readable after the round-trip.
2856        let (fm2, _) = read_file(&path).unwrap();
2857        let fields = fm2.link_fields();
2858        let links: Vec<(&str, &str)> = fields
2859            .iter()
2860            .map(|(k, l)| (k.as_str(), l.target.as_str()))
2861            .collect();
2862        assert_eq!(links, vec![("company", "records/companies/northstar")]);
2863    }
2864
2865    #[test]
2866    fn write_file_does_not_leave_temp_files_behind() {
2867        let dir = tempdir().unwrap();
2868        let path = dir.path().join("records/x.md");
2869        let fm = Frontmatter {
2870            type_: Some("note".into()),
2871            ..Default::default()
2872        };
2873        write_file(&path, &fm, "body\n").unwrap();
2874        // The directory should contain only the target file, no `.x.md.tmp.*`.
2875        let entries: Vec<String> = std::fs::read_dir(path.parent().unwrap())
2876            .unwrap()
2877            .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
2878            .collect();
2879        assert_eq!(entries, vec!["x.md".to_string()]);
2880    }
2881
2882    // ── is_content_file ──────────────────────────────────────────────────────
2883
2884    #[test]
2885    fn is_content_file_recognizes_layers_and_excludes_meta() {
2886        assert!(Frontmatter::is_content_file(Path::new(
2887            "sources/emails/2026-05-22.md"
2888        )));
2889        assert!(Frontmatter::is_content_file(Path::new(
2890            "records/contacts/sarah-chen.md"
2891        )));
2892        // A synthesis profile the agent authored lives under `records/` (the
2893        // old `wiki/` layer is gone, so a `wiki/...` path is NOT content).
2894        assert!(Frontmatter::is_content_file(Path::new(
2895            "records/profiles/sarah-chen.md"
2896        )));
2897        assert!(!Frontmatter::is_content_file(Path::new(
2898            "wiki/people/sarah-chen.md"
2899        )));
2900        // Absolute paths under a layer are still content.
2901        assert!(Frontmatter::is_content_file(Path::new(
2902            "/home/db/records/companies/northstar.md"
2903        )));
2904        // index.md at any level is meta.
2905        assert!(!Frontmatter::is_content_file(Path::new(
2906            "records/contacts/index.md"
2907        )));
2908        assert!(!Frontmatter::is_content_file(Path::new("index.md")));
2909        // Root meta files.
2910        assert!(!Frontmatter::is_content_file(Path::new("DB.md")));
2911        assert!(!Frontmatter::is_content_file(Path::new("log.md")));
2912    }
2913
2914    // ── effective_id ─────────────────────────────────────────────────────────
2915
2916    #[test]
2917    fn effective_id_prefers_explicit_then_derives_from_path() {
2918        let with_id = Frontmatter {
2919            id: Some("explicit-id".into()),
2920            ..Default::default()
2921        };
2922        assert_eq!(
2923            with_id.effective_id(Path::new("records/profiles/sarah-chen.md")),
2924            "explicit-id"
2925        );
2926        let no_id = Frontmatter::default();
2927        assert_eq!(
2928            no_id.effective_id(Path::new("records/profiles/sarah-chen.md")),
2929            "sarah-chen"
2930        );
2931    }
2932
2933    // ── get / set ────────────────────────────────────────────────────────────
2934
2935    #[test]
2936    fn set_routes_universal_and_custom_keys() {
2937        let mut fm = Frontmatter::default();
2938        fm.set("type", "contact").unwrap();
2939        fm.set("summary", "hi").unwrap();
2940        fm.set("company", "[[records/companies/northstar]]")
2941            .unwrap();
2942        assert_eq!(fm.type_.as_deref(), Some("contact"));
2943        assert_eq!(fm.summary.as_deref(), Some("hi"));
2944        // Custom key landed in extra, not a typed slot.
2945        assert_eq!(
2946            fm.extra.get("company").and_then(|v| v.as_str()),
2947            Some("[[records/companies/northstar]]")
2948        );
2949        // get reads from both typed fields and extra.
2950        assert_eq!(
2951            fm.get("type").and_then(|v| v.as_str().map(String::from)),
2952            Some("contact".into())
2953        );
2954        assert_eq!(
2955            fm.get("company").and_then(|v| v.as_str().map(String::from)),
2956            Some("[[records/companies/northstar]]".into())
2957        );
2958        assert!(fm.get("nonexistent").is_none());
2959    }
2960
2961    #[test]
2962    fn set_timestamp_validates_rfc3339() {
2963        let mut fm = Frontmatter::default();
2964        fm.set("created", "2026-05-27T08:00:00-07:00").unwrap();
2965        assert!(fm.created.is_some());
2966        let err = fm.set("updated", "not-a-date").unwrap_err();
2967        assert!(matches!(err, ParseError::BadTimestamp { .. }));
2968    }
2969
2970    // ── extract_wiki_links ───────────────────────────────────────────────────
2971
2972    #[test]
2973    fn extract_wiki_links_flags_full_path_short_form_and_extension() {
2974        let body = "See [[records/contacts/sarah-chen]] and [[sarah-chen]].\nAlso [[records/profiles/sarah-chen.md|Sarah]].\n";
2975        let links = extract_wiki_links(body, Path::new("doc.md"));
2976        assert_eq!(links.len(), 3);
2977
2978        // Full path, no extension, no display.
2979        assert_eq!(links[0].target, "records/contacts/sarah-chen");
2980        assert!(links[0].is_full_path);
2981        assert!(!links[0].has_md_extension);
2982        assert_eq!(links[0].display, None);
2983        assert_eq!(links[0].location.1, 1, "first link on line 1");
2984
2985        // Short form: not a full path.
2986        assert_eq!(links[1].target, "sarah-chen");
2987        assert!(!links[1].is_full_path, "bare target is short-form");
2988
2989        // Full path WITH .md extension and a display override on line 2.
2990        assert_eq!(links[2].target, "records/profiles/sarah-chen.md");
2991        assert!(links[2].is_full_path);
2992        assert!(links[2].has_md_extension);
2993        assert_eq!(links[2].display.as_deref(), Some("Sarah"));
2994        assert_eq!(links[2].location.1, 2);
2995    }
2996
2997    #[test]
2998    fn extract_wiki_links_reports_1_based_column_counting_chars() {
2999        // A multi-byte prefix (é is 2 bytes) must not skew the char column.
3000        let body = "café [[records/x/y]]";
3001        let links = extract_wiki_links(body, Path::new("d.md"));
3002        assert_eq!(links.len(), 1);
3003        // "café " is 5 chars, so the `[[` starts at char column 6 (1-based).
3004        assert_eq!(links[0].location.2, 6);
3005    }
3006
3007    #[test]
3008    fn extract_wiki_links_columns_are_correct_for_multiple_links_on_one_line() {
3009        // Locks the single-pass column cursor (the O(n²)→O(n) fix): each `[[`
3010        // reports the right 1-based CHAR column even with multi-byte prefixes and
3011        // several links per line.
3012        let body = "café [[a]] · [[records/x/y]] end";
3013        let links = extract_wiki_links(body, Path::new("d.md"));
3014        assert_eq!(links.len(), 2);
3015        // "café " = 5 chars → first `[[` at col 6.
3016        assert_eq!(links[0].location.2, 6);
3017        // "café [[a]] · " = 5 + 5 (`[[a]]`) + 3 (` · `, `·` is 1 char) = 13 chars
3018        // → second `[[` at col 14.
3019        assert_eq!(links[1].location.2, 14);
3020    }
3021
3022    #[test]
3023    fn extract_wiki_links_ignores_a_lone_path_without_brackets() {
3024        let links = extract_wiki_links(
3025            "records/contacts/sarah-chen is not a link",
3026            Path::new("d.md"),
3027        );
3028        assert!(links.is_empty());
3029    }
3030
3031    // ── extract_markdown_links ───────────────────────────────────────────────
3032
3033    #[test]
3034    fn extract_markdown_links_captures_external_and_not_wiki_links() {
3035        let body =
3036            "See [the thread](https://x.com/a) and [[records/contacts/sarah-chen]] internally.\n";
3037        let md = extract_markdown_links(body, Path::new("d.md"));
3038        assert_eq!(
3039            md.len(),
3040            1,
3041            "wiki-link must not be captured as a markdown link"
3042        );
3043        assert_eq!(md[0].text, "the thread");
3044        assert_eq!(md[0].url, "https://x.com/a");
3045        assert_eq!(md[0].location.1, 1);
3046
3047        // And the wiki-link extractor must not pick up the markdown link.
3048        let wl = extract_wiki_links(body, Path::new("d.md"));
3049        assert_eq!(wl.len(), 1);
3050        assert_eq!(wl[0].target, "records/contacts/sarah-chen");
3051    }
3052
3053    // ── link_fields ──────────────────────────────────────────────────────────
3054
3055    #[test]
3056    fn link_fields_extracts_scalar_list_and_summary_links() {
3057        // The canonical list form quotes each item so YAML parses it as clean
3058        // strings; a scalar field may be quoted OR written in the canonical
3059        // unquoted inline form `company: [[x]]` (SPEC § Linking).
3060        let yaml = "type: meeting\nsummary: with [[records/contacts/elena]]\ncompany: \"[[records/companies/northstar]]\"\nattendees:\n  - \"[[records/contacts/elena]]\"\n  - \"[[records/contacts/sarah]]\"\nnotes: just plain text";
3061        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
3062        // Sanity: company really did parse as a scalar string here.
3063        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_some());
3064        let fields = fm.link_fields();
3065
3066        // company (scalar) once, with the right target.
3067        let company: Vec<&str> = fields
3068            .iter()
3069            .filter(|(k, _)| k == "company")
3070            .map(|(_, l)| l.target.as_str())
3071            .collect();
3072        assert_eq!(company, vec!["records/companies/northstar"]);
3073        // attendees (block list) twice.
3074        let attendees: Vec<&str> = fields
3075            .iter()
3076            .filter(|(k, _)| k == "attendees")
3077            .map(|(_, l)| l.target.as_str())
3078            .collect();
3079        assert_eq!(
3080            attendees,
3081            vec!["records/contacts/elena", "records/contacts/sarah"]
3082        );
3083        // summary link surfaced.
3084        assert_eq!(fields.iter().filter(|(k, _)| k == "summary").count(), 1);
3085        // Plain-text field is not a link.
3086        assert_eq!(fields.iter().filter(|(k, _)| k == "notes").count(), 0);
3087    }
3088
3089    #[test]
3090    fn link_fields_surfaces_canonical_unquoted_scalar_link() {
3091        // Regression: the canonical scalar wiki-link form is the *unquoted*
3092        // inline `company: [[records/companies/northstar]]` (SPEC § Linking).
3093        // YAML parses `[[x]]` as a flow-list-in-a-list (`Seq[Seq[String]]`), so
3094        // a naive `as_str()`-only walk drops it. link_fields() must still
3095        // surface exactly one link with the correct target.
3096        let yaml = "type: meeting\ncompany: [[records/companies/northstar]]";
3097        let fm = Frontmatter::parse(yaml, Path::new("m.md")).unwrap();
3098        // Sanity: it really did parse as the nested sequence form, NOT a string.
3099        assert!(fm.extra.get("company").and_then(|v| v.as_str()).is_none());
3100
3101        let fields = fm.link_fields();
3102        let links: Vec<(&str, &str, Option<&str>)> = fields
3103            .iter()
3104            .map(|(k, l)| (k.as_str(), l.target.as_str(), l.display.as_deref()))
3105            .collect();
3106        assert_eq!(
3107            links,
3108            vec![("company", "records/companies/northstar", None)]
3109        );
3110
3111        // The `|display` segment survives the unquoted inline form too.
3112        let fm2 = Frontmatter::parse(
3113            "type: meeting\ncompany: [[records/companies/northstar|Northstar]]",
3114            Path::new("m.md"),
3115        )
3116        .unwrap();
3117        let f2 = fm2.link_fields();
3118        assert_eq!(f2.len(), 1);
3119        assert_eq!(f2[0].0, "company");
3120        assert_eq!(f2[0].1.target, "records/companies/northstar");
3121        assert_eq!(f2[0].1.display.as_deref(), Some("Northstar"));
3122    }
3123
3124    #[test]
3125    fn link_fields_ignores_plain_one_item_flow_list() {
3126        // A plain one-item flow list `aliases: [foo]` parses to `Seq[String]`
3127        // — one nesting level shallower than an unquoted `[[foo]]` — and must
3128        // NOT be mistaken for a wiki-link.
3129        let yaml = "type: contact\naliases: [foo]";
3130        let fm = Frontmatter::parse(yaml, Path::new("c.md")).unwrap();
3131        assert_eq!(fm.link_fields(), Vec::new());
3132    }
3133
3134    // ── detect_flow_form_link_lists ──────────────────────────────────────────
3135
3136    #[test]
3137    fn detect_flow_form_flags_list_misencodings_not_scalars() {
3138        // The flow-form list mis-encoding (triple-nested) IS flagged; a scalar
3139        // inline wiki-link (double-nested) is NOT.
3140        let bad = "attendees: [[[records/x]], [[records/y]]]\nscalar_inline: [[records/z]]";
3141        let flagged = detect_flow_form_link_lists(bad);
3142        assert_eq!(flagged, vec!["attendees".to_string()]);
3143
3144        // An UNquoted block list is also a mis-encoding (parses triple-nested).
3145        let unquoted_block = "attendees:\n  - [[records/x]]\n  - [[records/y]]";
3146        assert_eq!(
3147            detect_flow_form_link_lists(unquoted_block),
3148            vec!["attendees".to_string()]
3149        );
3150
3151        // The canonical QUOTED block form parses to clean strings — NOT flagged.
3152        let good = "attendees:\n  - \"[[records/x]]\"\n  - \"[[records/y]]\"";
3153        assert!(detect_flow_form_link_lists(good).is_empty());
3154
3155        // A plain scalar list of strings is not flagged.
3156        let plain = "tags: [a, b, c]";
3157        assert!(detect_flow_form_link_lists(plain).is_empty());
3158    }
3159
3160    // ── extract_sections ─────────────────────────────────────────────────────
3161
3162    #[test]
3163    fn extract_sections_levels_nesting_and_boundaries() {
3164        let body = "intro text\n## First\nalpha\n### Sub\nbeta\n## Second\ngamma\n";
3165        let secs = extract_sections(body);
3166        let headings: Vec<(&str, u8)> =
3167            secs.iter().map(|s| (s.heading.as_str(), s.level)).collect();
3168        assert_eq!(headings, vec![("First", 2), ("Sub", 3), ("Second", 2)]);
3169
3170        // "First" (H2) body extends through its H3 child, stopping at "Second".
3171        let first = &secs[0];
3172        assert!(first.body.contains("alpha"));
3173        assert!(first.body.contains("### Sub"));
3174        assert!(first.body.contains("beta"));
3175        assert!(!first.body.contains("Second"));
3176
3177        // "Sub" (H3) stops at the next equal-or-shallower heading ("Second").
3178        let sub = &secs[1];
3179        assert!(sub.body.contains("beta"));
3180        assert!(!sub.body.contains("gamma"));
3181
3182        // 1-based line numbers within the body.
3183        assert_eq!(first.line, 2);
3184        assert_eq!(secs[2].line, 6);
3185    }
3186
3187    #[test]
3188    fn extract_sections_ignores_headings_in_fenced_code() {
3189        let body = "## Real\n```\n## Fake heading in code\n```\nafter\n";
3190        let secs = extract_sections(body);
3191        assert_eq!(secs.len(), 1);
3192        assert_eq!(secs[0].heading, "Real");
3193        // The fenced "## Fake" is part of Real's body, not its own section.
3194        assert!(secs[0].body.contains("## Fake heading in code"));
3195    }
3196
3197    // ── parse_field_spec ─────────────────────────────────────────────────────
3198
3199    #[test]
3200    fn parse_field_spec_required_and_shape() {
3201        let f = parse_field_spec("- email (required, email)");
3202        assert_eq!(f.name, "email");
3203        assert!(f.required);
3204        assert_eq!(f.shape, Some(Shape::Email));
3205        assert!(f.unknown_modifiers.is_empty());
3206    }
3207
3208    #[test]
3209    fn parse_field_spec_link_prefix_strips_trailing_slash() {
3210        let f = parse_field_spec("- company (required, link to records/companies/)");
3211        assert!(f.required);
3212        assert_eq!(f.link_prefix, Some(PathBuf::from("records/companies")));
3213        assert_eq!(f.shape, None);
3214    }
3215
3216    #[test]
3217    fn parse_field_spec_default_preserves_case_and_value() {
3218        let f = parse_field_spec("- currency (default USD)");
3219        assert_eq!(f.name, "currency");
3220        assert_eq!(f.default, Some(Value::String("USD".into())));
3221    }
3222
3223    #[test]
3224    fn parse_field_spec_enum_captures_comma_list_as_last_modifier() {
3225        let f = parse_field_spec("- status (required, enum: open, closed, pending)");
3226        assert!(f.required);
3227        assert_eq!(
3228            f.enum_values,
3229            Some(vec![
3230                "open".to_string(),
3231                "closed".to_string(),
3232                "pending".to_string()
3233            ])
3234        );
3235    }
3236
3237    #[test]
3238    fn parse_field_spec_bare_enum_keyword_is_not_itself_a_value() {
3239        // `enum` with no colon: the values are the remaining tokens; the keyword
3240        // itself must NOT leak in as an allowed value.
3241        let f = parse_field_spec("- status (required, enum, open, closed)");
3242        assert!(f.required);
3243        assert_eq!(
3244            f.enum_values,
3245            Some(vec!["open".to_string(), "closed".to_string()])
3246        );
3247    }
3248
3249    #[test]
3250    fn parse_field_spec_unknown_modifier_is_captured_not_errored() {
3251        let f = parse_field_spec("- weird (required, frobnicate, string)");
3252        assert!(f.required);
3253        assert_eq!(f.shape, Some(Shape::String));
3254        assert_eq!(f.unknown_modifiers, vec!["frobnicate".to_string()]);
3255    }
3256
3257    #[test]
3258    fn parse_field_spec_no_parens_is_freeform_optional() {
3259        let f = parse_field_spec("- nickname");
3260        assert_eq!(f.name, "nickname");
3261        assert!(!f.required);
3262        assert_eq!(f.shape, None);
3263        assert!(f.link_prefix.is_none());
3264        assert!(f.enum_values.is_none());
3265        assert!(f.unknown_modifiers.is_empty());
3266    }
3267
3268    // ── parse_schema_bullet (directives) ─────────────────────────────────────
3269
3270    #[test]
3271    fn schema_bullet_unique_single_field() {
3272        match parse_schema_bullet("- unique: email") {
3273            SchemaBullet::Unique(fields) => assert_eq!(fields, vec!["email".to_string()]),
3274            other => panic!("expected Unique, got {other:?}"),
3275        }
3276    }
3277
3278    #[test]
3279    fn schema_bullet_unique_compound_trims_and_splits() {
3280        match parse_schema_bullet("- unique: date, amount , vendor") {
3281            SchemaBullet::Unique(fields) => assert_eq!(
3282                fields,
3283                vec![
3284                    "date".to_string(),
3285                    "amount".to_string(),
3286                    "vendor".to_string()
3287                ]
3288            ),
3289            other => panic!("expected Unique, got {other:?}"),
3290        }
3291    }
3292
3293    #[test]
3294    fn schema_bullet_summary_template_keeps_braces_and_inner_colons() {
3295        match parse_schema_bullet("- summary_template: {role} at {company} (x: y)") {
3296            SchemaBullet::SummaryTemplate(t) => assert_eq!(t, "{role} at {company} (x: y)"),
3297            other => panic!("expected SummaryTemplate, got {other:?}"),
3298        }
3299    }
3300
3301    #[test]
3302    fn schema_bullet_field_with_enum_modifier_is_not_a_directive() {
3303        // A field whose modifiers contain a colon (`enum:`) parses as a field, not
3304        // a directive — its head has a `(` before any `:`.
3305        match parse_schema_bullet("- status (enum: open, closed)") {
3306            SchemaBullet::Field(f) => {
3307                assert_eq!(f.name, "status");
3308                assert_eq!(
3309                    f.enum_values,
3310                    Some(vec!["open".to_string(), "closed".to_string()])
3311                );
3312            }
3313            other => panic!("expected Field, got {other:?}"),
3314        }
3315    }
3316
3317    #[test]
3318    fn parse_db_md_schema_captures_unique_and_summary_template() {
3319        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### contact\n- email (required, email)\n- unique: email\n- summary_template: {role} at {company}\n";
3320        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
3321        let s = config.schemas.get("contact").expect("contact schema");
3322        assert_eq!(s.fields.len(), 1, "directives are not parsed as fields");
3323        assert_eq!(s.unique_keys, vec![vec!["email".to_string()]]);
3324        assert_eq!(s.summary_template.as_deref(), Some("{role} at {company}"));
3325    }
3326
3327    #[test]
3328    fn schema_bullet_shard_directive_parses_values() {
3329        assert!(matches!(
3330            parse_schema_bullet("- shard: by-date"),
3331            SchemaBullet::Shard(Some(true))
3332        ));
3333        assert!(matches!(
3334            parse_schema_bullet("- shard: flat"),
3335            SchemaBullet::Shard(Some(false))
3336        ));
3337        // An unrecognized value is ignored (None), like an unknown modifier.
3338        assert!(matches!(
3339            parse_schema_bullet("- shard: weekly"),
3340            SchemaBullet::Shard(None)
3341        ));
3342        // A field whose name has a `(` before any `:` is still a field — the same
3343        // guard that keeps `- status (enum: a, b)` a field, not a directive.
3344        assert!(matches!(
3345            parse_schema_bullet("- shardiness (string)"),
3346            SchemaBullet::Field(_)
3347        ));
3348    }
3349
3350    #[test]
3351    fn parse_db_md_schema_captures_shard_directive() {
3352        let db = "---\ntype: db-md\nscope: x\nowner: y\n---\n\n## Schemas\n\n### shipment\n- carrier (string)\n- shard: by-date\n\n### contact\n- shard: flat\n";
3353        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
3354        let shipment = config.schemas.get("shipment").expect("shipment schema");
3355        assert_eq!(shipment.shard, Some(true));
3356        assert_eq!(
3357            shipment.fields.len(),
3358            1,
3359            "`shard:` is a directive, not a field"
3360        );
3361        assert_eq!(config.schemas.get("contact").unwrap().shard, Some(false));
3362    }
3363
3364    // ── parse_db_md ──────────────────────────────────────────────────────────
3365
3366    const CANONICAL_DB_MD: &str = "---\ntype: db-md\nscope: company\nowner: Sarah Chen\n---\n\n# Acme operations knowledge base\n\nCompany-scale institutional memory for Acme.\n\n## Agent instructions\n\nPrioritize creating `contact` records from new-sender emails. Use British English.\n\n## Policies\n\n### Frozen pages\n- `records/decisions/2026-q1-strategy.md` — finalized, do not modify.\n- `records/synthesis/2026-annual-plan.md` — signed-off plan.\n\n### Ignored types\n- `test`, `temp` — read but never synthesize.\n\n## Schemas\n\n### contact\n- name (required)\n- email (required, email)\n- company (required, link to records/companies/)\n- role (string)\n\n### expense\n- date (required, date)\n- amount (required)\n- currency (default USD)\n";
3367
3368    #[test]
3369    fn parse_db_md_extracts_all_canonical_sections() {
3370        let config = parse_db_md(CANONICAL_DB_MD, Path::new("DB.md")).unwrap();
3371
3372        // Agent instructions: free-form prose, heading line stripped.
3373        let ai = config
3374            .agent_instructions
3375            .expect("agent instructions present");
3376        assert!(ai.starts_with("Prioritize creating"));
3377        assert!(!ai.contains("## Agent instructions"));
3378
3379        // Frozen pages: paths extracted from backticked bullets, comments dropped.
3380        assert_eq!(
3381            config.frozen_pages,
3382            vec![
3383                PathBuf::from("records/decisions/2026-q1-strategy.md"),
3384                PathBuf::from("records/synthesis/2026-annual-plan.md"),
3385            ]
3386        );
3387
3388        // Ignored types: comma list, backticks/comment stripped.
3389        assert_eq!(
3390            config.ignored_types,
3391            vec!["test".to_string(), "temp".to_string()]
3392        );
3393
3394        // Schemas: two types, each with its fields in source order.
3395        assert_eq!(config.schemas.len(), 2);
3396        let contact = config.schemas.get("contact").expect("contact schema");
3397        let names: Vec<&str> = contact.fields.iter().map(|f| f.name.as_str()).collect();
3398        assert_eq!(names, vec!["name", "email", "company", "role"]);
3399        assert!(contact.fields[0].required); // name
3400        assert_eq!(contact.fields[1].shape, Some(Shape::Email)); // email
3401        assert_eq!(
3402            contact.fields[2].link_prefix,
3403            Some(PathBuf::from("records/companies"))
3404        ); // company
3405
3406        let expense = config.schemas.get("expense").expect("expense schema");
3407        let cur = expense
3408            .fields
3409            .iter()
3410            .find(|f| f.name == "currency")
3411            .unwrap();
3412        assert_eq!(cur.default, Some(Value::String("USD".into())));
3413    }
3414
3415    #[test]
3416    fn parse_db_md_handles_malformed_and_unknown_modifiers() {
3417        // corpus-b shape: a `## Schemas` section with a malformed bullet, an
3418        // unknown modifier, and bullets that appear with NO `### <type>`
3419        // heading (so they belong to no schema and are dropped).
3420        let text = "---\ntype: db-md\n---\n\n## Schemas\n- orphan (required)\n\n### ticket\n- priority (required, mystery, enum: low, high)\n- broken (\n";
3421        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
3422
3423        // The orphan bullet under `## Schemas` with no `### type` heading is not
3424        // captured as a schema.
3425        assert_eq!(config.schemas.len(), 1);
3426        let ticket = config.schemas.get("ticket").expect("ticket schema");
3427        assert_eq!(ticket.fields.len(), 2);
3428
3429        let priority = &ticket.fields[0];
3430        assert!(priority.required);
3431        assert_eq!(priority.unknown_modifiers, vec!["mystery".to_string()]);
3432        assert_eq!(
3433            priority.enum_values,
3434            Some(vec!["low".to_string(), "high".to_string()])
3435        );
3436
3437        // A bullet with an unclosed paren still yields a usable name.
3438        let broken = &ticket.fields[1];
3439        assert_eq!(broken.name, "broken");
3440    }
3441
3442    #[test]
3443    fn parse_db_md_missing_frontmatter_errors() {
3444        let text = "# No frontmatter\n\n## Agent instructions\nhi\n";
3445        let err = parse_db_md(text, Path::new("DB.md")).unwrap_err();
3446        assert!(matches!(err, ParseError::MissingFrontmatter { .. }));
3447    }
3448
3449    #[test]
3450    fn parse_db_md_absent_sections_default_empty() {
3451        let text = "---\ntype: db-md\n---\n\n# Title only\n";
3452        let config = parse_db_md(text, Path::new("DB.md")).unwrap();
3453        assert_eq!(config, Config::default());
3454    }
3455
3456    // ── fm set / --fm list-valued link fields (meeting.attendees & friends) ──
3457
3458    /// `Frontmatter::set` is the value path every write surface (`fm set`,
3459    /// `write --fm`) funnels through. A list-of-wiki-links value (the SPEC's
3460    /// `meeting.attendees` shape) must serialize as a YAML **block sequence** of
3461    /// quoted links — readable back by [`links_in_field_value`] and accepted by
3462    /// `dbmd validate` — never the flow-form scalar string that trips
3463    /// `WIKI_LINK_FLOW_FORM_LIST`. Both the unquoted (`[[[a]], [[b]]]`) and
3464    /// quoted (`["[[a]]", "[[b]]"]`) spellings an agent types must normalize.
3465    #[test]
3466    fn set_list_of_wiki_links_becomes_block_sequence_both_spellings() {
3467        for value in [
3468            "[[[records/contacts/a]], [[records/contacts/b]]]",
3469            r#"["[[records/contacts/a]]", "[[records/contacts/b]]"]"#,
3470        ] {
3471            let mut fm = Frontmatter::default();
3472            fm.set("attendees", value).unwrap();
3473
3474            // Stored as a 2-element sequence of clean quoted links.
3475            let stored = fm.extra.get("attendees").expect("attendees set");
3476            let Value::Sequence(items) = stored else {
3477                panic!("attendees must be a Sequence, got {stored:?} for input {value}");
3478            };
3479            assert_eq!(items.len(), 2, "input {value}");
3480            assert_eq!(items[0], Value::String("[[records/contacts/a]]".into()));
3481            assert_eq!(items[1], Value::String("[[records/contacts/b]]".into()));
3482
3483            // The edge enumerator reads exactly the two links back (no stray
3484            // bracket targets, the flow-form-string symptom).
3485            let links: Vec<_> = links_in_field_value(stored)
3486                .into_iter()
3487                .map(|l| l.target)
3488                .collect();
3489            assert_eq!(
3490                links,
3491                vec!["records/contacts/a", "records/contacts/b"],
3492                "input {value}"
3493            );
3494
3495            // And the canonical writer renders it block-style, not as a scalar.
3496            let yaml = fm.to_yaml();
3497            assert!(
3498                yaml.contains("attendees:\n"),
3499                "expected block list in:\n{yaml}"
3500            );
3501            assert!(
3502                !yaml.contains("attendees: '[["),
3503                "must not be a flow-form scalar string in:\n{yaml}"
3504            );
3505        }
3506    }
3507
3508    /// A *single* inline wiki-link stays a scalar string (renders inline
3509    /// `field: [[x]]`), and a single link must never be widened to a one-item
3510    /// list — preserving the common `contact.company` / `expense.vendor` shape.
3511    #[test]
3512    fn set_single_inline_wiki_link_stays_scalar() {
3513        let mut fm = Frontmatter::default();
3514        fm.set("company", "[[records/companies/tideform]]").unwrap();
3515        assert_eq!(
3516            fm.extra.get("company"),
3517            Some(&Value::String("[[records/companies/tideform]]".into())),
3518        );
3519        // Still recognized as one link.
3520        let links: Vec<_> = links_in_field_value(fm.extra.get("company").unwrap())
3521            .into_iter()
3522            .map(|l| l.target)
3523            .collect();
3524        assert_eq!(links, vec!["records/companies/tideform"]);
3525    }
3526
3527    /// Plain text and a non-link flow list are left as verbatim scalar strings —
3528    /// the list normalization only triggers when every item is a clean wiki-link.
3529    #[test]
3530    fn set_non_link_values_stay_scalar_strings() {
3531        let mut fm = Frontmatter::default();
3532        fm.set("location", "Video call (remote)").unwrap();
3533        assert_eq!(
3534            fm.extra.get("location"),
3535            Some(&Value::String("Video call (remote)".into())),
3536        );
3537
3538        // A flow list whose items are NOT wiki-links must not be reinterpreted as
3539        // a link sequence; it stays the scalar string the agent passed.
3540        fm.set("note", "[draft, wip]").unwrap();
3541        assert_eq!(
3542            fm.extra.get("note"),
3543            Some(&Value::String("[draft, wip]".into()))
3544        );
3545    }
3546
3547    // ── Regression: non-string YAML keys round-trip (no Rust Debug corruption) ─
3548
3549    #[test]
3550    fn regression_non_string_yaml_keys_keep_their_text_on_round_trip() {
3551        // A numeric/bool/null/float frontmatter key is valid YAML and must NOT be
3552        // rewritten to its Rust `Debug` form (`Number(2026)`, `Bool(true)`,
3553        // `'Null'`). After the fix the key text survives (the key narrows to a
3554        // string-typed key, but the operator's data is no longer corrupted).
3555        let yaml = "type: note\n2026: planning notes\ntrue: yes-key\n3.14: f\n";
3556        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3557        // Keys are stored as their scalar text, not the Debug string.
3558        assert!(fm.extra.contains_key("2026"), "numeric key text lost");
3559        assert!(fm.extra.contains_key("true"), "bool key text lost");
3560        assert!(fm.extra.contains_key("3.14"), "float key text lost");
3561        assert!(!fm.extra.keys().any(|k| k.starts_with("Number(")));
3562        assert!(!fm.extra.keys().any(|k| k.starts_with("Bool(")));
3563
3564        // And a re-emit never produces the Debug forms on disk.
3565        let out = fm.to_yaml();
3566        assert!(!out.contains("Number("), "Debug-form key emitted:\n{out}");
3567        assert!(!out.contains("Bool("), "Debug-form key emitted:\n{out}");
3568        // The key text is still present (quoted, since it now reads as a string).
3569        assert!(out.contains("2026"), "numeric key dropped:\n{out}");
3570        assert!(out.contains("planning notes"), "value dropped:\n{out}");
3571    }
3572
3573    // ── Regression: universal-key sequence/mapping values are preserved (#2) ───
3574
3575    #[test]
3576    fn regression_universal_key_non_scalar_value_is_preserved_not_deleted() {
3577        // A universal key carrying a sequence/mapping (`status: [active, draft]`)
3578        // is not a valid scalar for that field. Before the fix, the matched arm
3579        // consumed-and-dropped it (scalar_string -> None) and `to_yaml` then
3580        // omitted the field — `dbmd format` silently DELETED it. It must now pass
3581        // through `extra` and re-emit verbatim.
3582        let yaml = "type: note\nstatus:\n  - active\n  - draft\nsummary:\n  a: 1\n  b: 2\n";
3583        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3584        // The typed accessors stay None (no valid scalar), but the data lives in
3585        // extra so nothing is lost.
3586        assert!(fm.status.is_none());
3587        assert!(fm.summary.is_none());
3588        assert!(fm.extra.contains_key("status"), "status value destroyed");
3589        assert!(fm.extra.contains_key("summary"), "summary value destroyed");
3590
3591        // A re-emit keeps both fields' data on disk.
3592        let out = fm.to_yaml();
3593        assert!(out.contains("status"), "status deleted on re-emit:\n{out}");
3594        assert!(out.contains("active"), "status items deleted:\n{out}");
3595        assert!(
3596            out.contains("summary"),
3597            "summary deleted on re-emit:\n{out}"
3598        );
3599
3600        // Round-trips as a fixed point — repeated curator-loop writes don't lose
3601        // the data.
3602        let reparsed = Frontmatter::parse(&out, Path::new("x.md")).unwrap();
3603        assert!(reparsed.extra.contains_key("status"));
3604        assert!(reparsed.extra.contains_key("summary"));
3605    }
3606
3607    // ── Regression: non-scalar tags items don't erase the tags field (#5) ──────
3608
3609    #[test]
3610    fn regression_non_scalar_tags_value_is_preserved_not_erased() {
3611        // `tags: [[vip]]` (an authoring slip — wiki-link brackets around a tag)
3612        // parses to a nested sequence; before the fix `parse_tags` filtered the
3613        // non-scalar item out and `to_yaml` then omitted the now-empty tags vec,
3614        // silently DELETING the tags line. It must now survive the re-emit (the
3615        // key data is preserved; the field is never dropped).
3616        let yaml = "type: note\ntags: [[vip]]\n";
3617        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3618        // The typed tags vec is empty (no clean scalar list), but the raw value
3619        // is preserved in extra so nothing is destroyed.
3620        assert!(fm.tags.is_empty());
3621        assert!(fm.extra.contains_key("tags"), "tags value destroyed");
3622
3623        let out = fm.to_yaml();
3624        assert!(out.contains("tags"), "tags deleted on re-emit:\n{out}");
3625        // The `vip` text survives on disk in some form (never erased).
3626        assert!(out.contains("vip"), "tag content erased:\n{out}");
3627
3628        // A clean tag list still parses to the typed vec (not regressed).
3629        let clean =
3630            Frontmatter::parse("type: note\ntags: [vip, renewal]\n", Path::new("x.md")).unwrap();
3631        assert_eq!(clean.tags, vec!["vip".to_string(), "renewal".to_string()]);
3632        assert!(!clean.extra.contains_key("tags"));
3633    }
3634
3635    // ── Regression: plain nested string lists are NOT fabricated into links (#3) ─
3636
3637    #[test]
3638    fn regression_plain_nested_string_list_is_not_turned_into_wiki_links() {
3639        // `groups: [[alpha], [beta]]` is the data [["alpha"],["beta"]] — an
3640        // unknown nested string list that must pass through verbatim. Before the
3641        // fix, canonicalize_extra_value fabricated `- '[[alpha]]'` / `- '[[beta]]'`
3642        // (short-form links the tool then flagged), changing the field's type.
3643        let yaml = "type: note\ngroups: [[alpha], [beta]]\n";
3644        let fm = Frontmatter::parse(yaml, Path::new("x.md")).unwrap();
3645        let before = fm.extra.get("groups").cloned();
3646
3647        let out = fm.to_yaml();
3648        // No fabricated wiki-link brackets in the emitted YAML.
3649        assert!(!out.contains("[[alpha]]"), "fabricated a wiki-link:\n{out}");
3650        assert!(!out.contains("[[beta]]"), "fabricated a wiki-link:\n{out}");
3651
3652        // The value is unchanged across the canonical re-emit.
3653        let reparsed = Frontmatter::parse(&out, Path::new("x.md")).unwrap();
3654        assert_eq!(
3655            reparsed.extra.get("groups"),
3656            before.as_ref(),
3657            "nested string list mutated by canonicalize_extra_value"
3658        );
3659        // And it surfaces no links.
3660        assert!(reparsed.link_fields().is_empty());
3661    }
3662
3663    // ── Regression: fence-line trailing whitespace is tolerated (#4) ───────────
3664
3665    #[test]
3666    fn regression_split_frontmatter_tolerates_trailing_whitespace_on_fences() {
3667        // A fence written `--- ` (trailing space — invisible in editors) is
3668        // indexed/validated clean by index.rs/validate.rs (both use `trim_end()`)
3669        // but, before the fix, hard-failed every read/edit surface routed through
3670        // `split_frontmatter`. All three must now agree.
3671        let text = "--- \ntype: note\nsummary: x\n---\t\nbody\n";
3672        let parsed = split_frontmatter(text, Path::new("f.md")).unwrap();
3673        assert_eq!(parsed.frontmatter_yaml, "type: note\nsummary: x\n");
3674        assert_eq!(parsed.body, "body\n");
3675
3676        // End to end through read_file's parse.
3677        let fm = Frontmatter::parse(&parsed.frontmatter_yaml, Path::new("f.md")).unwrap();
3678        assert_eq!(fm.type_.as_deref(), Some("note"));
3679    }
3680
3681    // ── Regression: CommonMark trailing-'#' heading rule (#6) ──────────────────
3682
3683    #[test]
3684    fn regression_heading_text_keeps_abutting_hash_drops_closing_sequence() {
3685        // `## C#` → `C#` (the `#` abuts content, not a closing sequence).
3686        assert_eq!(heading_text("## C#", 2), "C#");
3687        assert_eq!(heading_text("## F#", 2), "F#");
3688        assert_eq!(heading_text("## issue-123#", 2), "issue-123#");
3689        // A genuine ATX closing sequence (space before the `#` run) is dropped.
3690        assert_eq!(heading_text("## Title ##", 2), "Title");
3691        assert_eq!(heading_text("## Title #", 2), "Title");
3692        // All-hashes content collapses to empty.
3693        assert_eq!(heading_text("## ##", 2), "");
3694        // No trailing hashes — unchanged.
3695        assert_eq!(heading_text("## Plain", 2), "Plain");
3696    }
3697
3698    #[test]
3699    fn regression_extract_sections_keeps_csharp_heading_and_schema_type_binds() {
3700        // `dbmd sections` must report `C#`, not `C`.
3701        let secs = extract_sections("## C#\nbody\n");
3702        assert_eq!(secs.len(), 1);
3703        assert_eq!(secs[0].heading, "C#");
3704
3705        // And a `### c#` schema must register under `c#`, not `c`.
3706        let db = "---\ntype: db-md\n---\n\n## Schemas\n\n### c#\n- name (required)\n";
3707        let config = parse_db_md(db, Path::new("DB.md")).unwrap();
3708        assert!(
3709            config.schemas.contains_key("c#"),
3710            "schema bound to wrong key"
3711        );
3712        assert!(!config.schemas.contains_key("c"));
3713    }
3714
3715    // ── Regression: section line numbers offset by the frontmatter block (#7) ──
3716
3717    #[test]
3718    fn regression_extract_sections_in_file_reports_source_line_numbers() {
3719        // A heading on file line 6 (after a 4-line frontmatter block + 1 body
3720        // line) must be reported as L6, not the body-relative L2.
3721        let text = "---\ntype: note\nsummary: x\n---\nbody line\n## Heading\nmore\n";
3722        let secs = extract_sections_in_file(text);
3723        assert_eq!(secs.len(), 1);
3724        assert_eq!(secs[0].heading, "Heading");
3725        assert_eq!(secs[0].line, 6, "section line not offset by frontmatter");
3726
3727        // The body-relative helper is unchanged (validate relies on that frame).
3728        let body_secs = extract_sections("body line\n## Heading\nmore\n");
3729        assert_eq!(body_secs[0].line, 2);
3730
3731        // No frontmatter: whole text is body, no offset.
3732        let plain = extract_sections_in_file("## Top\nx\n## Next\n");
3733        assert_eq!(plain[0].line, 1);
3734        assert_eq!(plain[1].line, 3);
3735    }
3736
3737    // ── Regression: colon-form schema field bullet parses modifiers (#8) ───────
3738
3739    #[test]
3740    fn regression_colon_form_field_bullet_parses_modifiers() {
3741        // `- title: string, required` is the natural mis-spelling of
3742        // `- title (string, required)`; before the fix the whole text became the
3743        // field name and every modifier was silently lost.
3744        let f = parse_field_spec("- title: string, required");
3745        assert_eq!(f.name, "title");
3746        assert!(f.required, "required modifier lost on colon-form");
3747        assert_eq!(f.shape, Some(Shape::String));
3748
3749        // Through the schema-bullet classifier (the real path), it is a Field.
3750        match parse_schema_bullet("- title: string, required") {
3751            SchemaBullet::Field(f) => {
3752                assert_eq!(f.name, "title");
3753                assert!(f.required);
3754                assert_eq!(f.shape, Some(Shape::String));
3755            }
3756            other => panic!("expected Field, got {other:?}"),
3757        }
3758
3759        // A paren form whose modifiers contain a colon still parses by parens.
3760        let g = parse_field_spec("- status (enum: open, closed)");
3761        assert_eq!(g.name, "status");
3762        assert_eq!(
3763            g.enum_values,
3764            Some(vec!["open".to_string(), "closed".to_string()])
3765        );
3766    }
3767
3768    // ── Regression: comma inside a `default` value is preserved (#9) ───────────
3769
3770    #[test]
3771    fn regression_default_value_preserves_internal_commas() {
3772        let f = parse_field_spec("- title (default Director, Operations)");
3773        assert_eq!(
3774            f.default,
3775            Some(Value::String("Director, Operations".into())),
3776            "comma-bearing default truncated"
3777        );
3778
3779        let g = parse_field_spec("- region (default North America, EMEA fallback)");
3780        assert_eq!(
3781            g.default,
3782            Some(Value::String("North America, EMEA fallback".into()))
3783        );
3784
3785        // A single-token default still works (no regression).
3786        let h = parse_field_spec("- currency (default USD)");
3787        assert_eq!(h.default, Some(Value::String("USD".into())));
3788    }
3789
3790    // ── Regression: a `default` after `enum` is parsed, not swallowed (#10) ────
3791
3792    #[test]
3793    fn regression_default_after_enum_is_parsed_not_an_enum_member() {
3794        let f = parse_field_spec("- status (enum: open, closed, default open)");
3795        assert_eq!(
3796            f.enum_values,
3797            Some(vec!["open".to_string(), "closed".to_string()]),
3798            "`default open` leaked into the enum list"
3799        );
3800        assert_eq!(
3801            f.default,
3802            Some(Value::String("open".into())),
3803            "default after enum was dropped"
3804        );
3805
3806        // The bare `enum` keyword form, with a trailing default.
3807        let g = parse_field_spec("- status (enum, open, closed, default open)");
3808        assert_eq!(
3809            g.enum_values,
3810            Some(vec!["open".to_string(), "closed".to_string()])
3811        );
3812        assert_eq!(g.default, Some(Value::String("open".into())));
3813    }
3814
3815    // ── Regression: frozen-page policy does not fail open (#11) ────────────────
3816
3817    #[test]
3818    fn regression_frozen_match_handles_leading_slash() {
3819        let cfg = Config {
3820            frozen_pages: vec![PathBuf::from("/records/decisions/q1.md")],
3821            ..Config::default()
3822        };
3823        assert!(
3824            cfg.is_frozen(Path::new("records/decisions/q1.md")),
3825            "leading-slash entry failed open"
3826        );
3827        assert!(cfg.is_frozen(Path::new("records/decisions/q1")));
3828    }
3829
3830    #[test]
3831    fn regression_frozen_match_supports_globs() {
3832        let cfg = Config {
3833            frozen_pages: vec![PathBuf::from("records/decisions/*")],
3834            ..Config::default()
3835        };
3836        assert!(
3837            cfg.is_frozen(Path::new("records/decisions/q1.md")),
3838            "glob entry failed to protect a concrete file"
3839        );
3840        assert!(cfg.is_frozen(Path::new("records/decisions/q2.md")));
3841        // The glob does not cross a `/` segment.
3842        assert!(!cfg.is_frozen(Path::new("records/decisions/sub/q1.md")));
3843        // `**` crosses segments.
3844        let deep = Config {
3845            frozen_pages: vec![PathBuf::from("records/**")],
3846            ..Config::default()
3847        };
3848        assert!(deep.is_frozen(Path::new("records/decisions/sub/q1.md")));
3849        assert!(deep.is_frozen(Path::new("records/x.md")));
3850        assert!(!deep.is_frozen(Path::new("sources/x.md")));
3851        // A `*.md`-style intra-segment glob.
3852        let suffix = Config {
3853            frozen_pages: vec![PathBuf::from("records/decisions/q*")],
3854            ..Config::default()
3855        };
3856        assert!(suffix.is_frozen(Path::new("records/decisions/q1.md")));
3857        assert!(!suffix.is_frozen(Path::new("records/decisions/draft.md")));
3858    }
3859
3860    #[test]
3861    fn regression_frozen_entry_single_hyphen_comment_is_stripped() {
3862        // `records/decisions/q3.md - finalized` (single ASCII hyphen comment, no
3863        // backticks): the comment must be stripped so the entry is just the path.
3864        let path = extract_path_bullet("- records/decisions/q3.md - finalized");
3865        assert_eq!(path, "records/decisions/q3.md");
3866
3867        // End to end: such a bullet freezes the file.
3868        let cfg = Config {
3869            frozen_pages: vec![PathBuf::from(extract_path_bullet(
3870                "- records/decisions/q3.md - finalized",
3871            ))],
3872            ..Config::default()
3873        };
3874        assert!(
3875            cfg.is_frozen(Path::new("records/decisions/q3.md")),
3876            "single-hyphen-comment entry failed open"
3877        );
3878    }
3879}
dbmd_core/parser.rs

dbmd_core/
parser.rs