Skip to main content

dbmd_core/
summary.rs

1//! `summary` — the deterministic default-`summary` composer.
2//!
3//! Used by `dbmd fm init` and `dbmd write` when the agent doesn't supply a
4//! `summary`. [`compose_default`] renders the type's `summary_template` (from
5//! the store's `DB.md ## Schemas`) when one is declared, and otherwise falls
6//! back to the body's first non-heading paragraph. No type carries a built-in
7//! template — the template, like the schema, is the store's to declare.
8//!
9//! Contract: **deterministic** (same `(type, frontmatter, body)` → same
10//! string), **single-line** (newlines collapsed to spaces), and **capped at 200
11//! chars** (the SPEC readability bound). The tool generates a deterministic
12//! floor; the agent provides the ceiling via `dbmd fm set <file> summary='…'`.
13
14use serde_norway::Value;
15
16use crate::parser::Frontmatter;
17use crate::store::Store;
18
19/// The SPEC's `summary` length bound, in characters.
20pub const MAX_SUMMARY_LEN: usize = 200;
21
22/// Compose a deterministic default `summary` for a file from its `type`,
23/// frontmatter, and body. If the store's `## Schemas` declares a
24/// `summary_template` for the type, it is rendered with `{field}` interpolation;
25/// otherwise the default is the body's first non-heading paragraph. The result
26/// is always single-line and ≤ [`MAX_SUMMARY_LEN`] chars.
27///
28/// The tool generates a deterministic floor; the agent provides the ceiling via
29/// `dbmd fm set <file> summary='…'`.
30pub fn compose_default(
31    store: &Store,
32    type_: &str,
33    frontmatter: &Frontmatter,
34    body: &str,
35) -> crate::Result<String> {
36    let composed = match store
37        .config
38        .schemas
39        .get(type_)
40        .and_then(|s| s.summary_template.as_deref())
41    {
42        Some(template) => render_template(template, frontmatter),
43        None => compose_from_body(body),
44    };
45    Ok(normalize(&composed))
46}
47
48/// Render a `summary_template` — substitute each `{field}` with the file's
49/// frontmatter value for `field`. A scalar (incl. a wiki-link, reduced to its
50/// display-or-leaf form) renders inline; a list renders comma-joined; an
51/// absent/empty field renders empty. An unmatched `{` is emitted verbatim
52/// (templates are simple field-interpolation floors, not a templating language).
53fn render_template(template: &str, fm: &Frontmatter) -> String {
54    let mut out = String::with_capacity(template.len());
55    let mut rest = template;
56    while let Some(open) = rest.find('{') {
57        out.push_str(&rest[..open]);
58        let after = &rest[open + 1..];
59        let close = after.find('}');
60        let next_open = after.find('{');
61        match close {
62            // A clean `{field}` — no nested `{` before the closing `}`.
63            Some(c) if next_open.is_none_or(|n| n > c) => {
64                let key = after[..c].trim();
65                if let Some(scalar) = field_text(fm, key) {
66                    out.push_str(&scalar);
67                } else {
68                    let list = list_field_texts(fm, key);
69                    if !list.is_empty() {
70                        out.push_str(&list.join(", "));
71                    }
72                }
73                rest = &after[c + 1..];
74            }
75            // A stray `{` (no `}`, or another `{` first) — emit it verbatim.
76            _ => {
77                out.push('{');
78                rest = after;
79            }
80        }
81    }
82    out.push_str(rest);
83    out
84}
85
86/// The body fallback: the file's first non-heading paragraph, truncated to
87/// [`MAX_SUMMARY_LEN`] chars (the truncation is applied by [`normalize`]).
88pub fn compose_from_body(body: &str) -> String {
89    first_paragraph(body).unwrap_or_default()
90}
91
92/// Collapse a candidate summary to the **single-line** half of the contract:
93/// runs of whitespace (including newlines) become single spaces and the result
94/// is trimmed — but the length is **not** truncated. This is what an explicit
95/// agent-supplied `--summary` is normalized through (`dbmd write`/`dbmd fm
96/// init`): it must satisfy `SUMMARY_MULTILINE` without losing the agent's
97/// content, matching the `dbmd fm set` path (which preserves the value
98/// verbatim) and the SPEC stance that the agent provides the ceiling. The
99/// validator surfaces an over-long value as a `SUMMARY_TOO_LONG` *warning*, not
100/// silent truncation.
101pub fn collapse_whitespace(candidate: &str) -> String {
102    // `split_whitespace` collapses any run of ASCII/Unicode whitespace
103    // (spaces, tabs, newlines) and trims leading/trailing — giving the
104    // single-line, trimmed form in one pass.
105    candidate.split_whitespace().collect::<Vec<_>>().join(" ")
106}
107
108/// Normalize a candidate summary to the full deterministic-**floor** contract:
109/// collapse whitespace (via [`collapse_whitespace`]) then truncate to
110/// [`MAX_SUMMARY_LEN`] **chars** (never splitting a UTF-8 codepoint). Used by
111/// [`compose_default`] for the tool-generated floor. Explicit agent summaries
112/// go through [`collapse_whitespace`] instead, so they are never silently cut.
113pub fn normalize(candidate: &str) -> String {
114    truncate_chars(&collapse_whitespace(candidate), MAX_SUMMARY_LEN)
115}
116
117// ── Internal helpers ────────────────────────────────────────────────────────
118
119/// Truncate to at most `max` Unicode scalar values, on a char boundary.
120fn truncate_chars(s: &str, max: usize) -> String {
121    match s.char_indices().nth(max) {
122        Some((byte_idx, _)) => s[..byte_idx].to_string(),
123        None => s.to_string(),
124    }
125}
126
127/// Read a frontmatter field's raw YAML value, checking the universal typed
128/// fields first and then [`Frontmatter::extra`] — mirroring the documented
129/// contract of `Frontmatter::get` but reading the struct directly so this
130/// module never depends on another module's body.
131fn field_value(fm: &Frontmatter, key: &str) -> Option<Value> {
132    match key {
133        "type" => fm.type_.clone().map(Value::String),
134        "id" => fm.id.clone().map(Value::String),
135        "summary" => fm.summary.clone().map(Value::String),
136        "status" => fm.status.clone().map(Value::String),
137        // Typed universal fields a `summary_template` may legitimately
138        // interpolate. `created`/`updated` render as their canonical RFC3339
139        // string; `tags` as a sequence (which `list_field_texts` comma-joins).
140        // Without these arms, `{created}` / `{updated}` / `{tags}` would
141        // silently render empty even when the value is present.
142        "created" => fm.created.map(|t| Value::String(t.to_rfc3339())),
143        "updated" => fm.updated.map(|t| Value::String(t.to_rfc3339())),
144        "tags" => {
145            if fm.tags.is_empty() {
146                None
147            } else {
148                Some(Value::Sequence(
149                    fm.tags.iter().cloned().map(Value::String).collect(),
150                ))
151            }
152        }
153        _ => fm.extra.get(key).cloned(),
154    }
155}
156
157/// Read a single frontmatter field as a rendered plain-text scalar, or `None`
158/// when the field is absent, null, or renders empty. Wiki-link-valued fields
159/// are reduced to their display-or-leaf human form (never the raw `[[...]]`).
160fn field_text(fm: &Frontmatter, key: &str) -> Option<String> {
161    let v = field_value(fm, key)?;
162    let rendered = render_scalar(&v)?;
163    let trimmed = rendered.trim();
164    if trimmed.is_empty() {
165        None
166    } else {
167        Some(trimmed.to_string())
168    }
169}
170
171/// Read a list-valued frontmatter field as rendered plain-text items. A scalar
172/// (non-sequence) value is treated as a single-item list. Wiki-link items are
173/// reduced to their display-or-leaf form. Empty / null items are dropped.
174fn list_field_texts(fm: &Frontmatter, key: &str) -> Vec<String> {
175    let Some(v) = field_value(fm, key) else {
176        return Vec::new();
177    };
178    match v {
179        Value::Sequence(items) => items
180            .iter()
181            .filter_map(|item| {
182                let r = render_scalar(item)?;
183                let t = r.trim();
184                if t.is_empty() {
185                    None
186                } else {
187                    Some(t.to_string())
188                }
189            })
190            .collect(),
191        other => render_scalar(&other)
192            .map(|r| r.trim().to_string())
193            .filter(|t| !t.is_empty())
194            .into_iter()
195            .collect(),
196    }
197}
198
199/// Render a single YAML scalar to plain display text. Strings (including YAML
200/// date scalars, which deserialize as strings) are returned as-is but with any
201/// wiki-link reduced to display-or-leaf; numbers and bools stringify
202/// canonically; null / mapping / nested-sequence yield `None`.
203fn render_scalar(v: &Value) -> Option<String> {
204    match v {
205        Value::String(s) => Some(reduce_wiki_link(s)),
206        Value::Sequence(_) => render_unquoted_wiki_link(v),
207        Value::Bool(b) => Some(b.to_string()),
208        Value::Number(n) => {
209            // Render integers without a trailing `.0`; keep the natural form
210            // otherwise. `Number`'s Display already does this.
211            Some(n.to_string())
212        }
213        Value::Null | Value::Mapping(_) | Value::Tagged(_) => None,
214    }
215}
216
217/// YAML parses an unquoted wiki-link scalar (`company: [[records/x]]`) as a
218/// nested sequence, not a string. Recognize that shape so summary templates
219/// render it exactly like the quoted scalar form.
220fn render_unquoted_wiki_link(v: &Value) -> Option<String> {
221    let Value::Sequence(outer) = v else {
222        return None;
223    };
224    if outer.len() != 1 {
225        return None;
226    }
227    let Value::Sequence(inner) = &outer[0] else {
228        return None;
229    };
230    let [Value::String(target)] = inner.as_slice() else {
231        return None;
232    };
233    Some(reduce_wiki_link(&format!("[[{target}]]")))
234}
235
236/// If `s` is a wiki-link (`[[target]]` or `[[target|display]]`), reduce it to
237/// the human form: the `display` override when present, else the last path
238/// segment of the target (with any `.md` suffix dropped). Non-link strings are
239/// returned unchanged.
240fn reduce_wiki_link(s: &str) -> String {
241    let trimmed = s.trim();
242    let inner = trimmed
243        .strip_prefix("[[")
244        .and_then(|rest| rest.strip_suffix("]]"));
245    let Some(inner) = inner else {
246        return s.to_string();
247    };
248    // Only reduce when the ENTIRE trimmed value is a single `[[…]]` link. A
249    // value like `[[a]] and [[b]]` also starts `[[` and ends `]]`, but its
250    // `inner` (`a]] and [[b`) is not one link — reducing it would emit a garbled
251    // fragment of the last path (`b`), dropping the first link and the connecting
252    // text. Such a multi-link / mixed scalar is passed through unchanged.
253    if inner.contains("[[") || inner.contains("]]") {
254        return s.to_string();
255    }
256    // `target|display` → prefer display.
257    let (target, display) = match inner.split_once('|') {
258        Some((t, d)) => (t, Some(d)),
259        None => (inner, None),
260    };
261    if let Some(d) = display {
262        let d = d.trim();
263        if !d.is_empty() {
264            return d.to_string();
265        }
266    }
267    let leaf = target.trim().rsplit('/').next().unwrap_or(target).trim();
268    leaf.strip_suffix(".md").unwrap_or(leaf).to_string()
269}
270
271/// The first non-heading, non-blank paragraph of the body: consecutive
272/// non-heading text lines joined by a space, starting at the first such line.
273///
274/// Heading lines are skipped before the paragraph and terminate it once started.
275/// "Heading" follows CommonMark, not "starts with `#`":
276///
277/// - **ATX** (`# Title`) requires a space (or end of line) after the `#` run
278///   ([`is_atx_heading`]); `#1 priority…` / `#hashtag` are prose, not headings.
279/// - **Setext** (a text line followed by an all-`=` or all-`-` underline) is a
280///   heading too; both the title line and its underline are skipped.
281/// - A leading **fenced code block** (```` ``` ````…```` ``` ````) is skipped in
282///   full, so the fence info-string (`` ```bash ``) and any `#`-comment inside
283///   the fence are never mistaken for prose or an ATX heading.
284///
285/// `None` when the body has no prose paragraph.
286fn first_paragraph(body: &str) -> Option<String> {
287    let lines: Vec<&str> = body.lines().collect();
288    let mut collected: Vec<&str> = Vec::new();
289    let mut i = 0;
290    while i < lines.len() {
291        let raw = lines[i];
292        let t = raw.trim();
293
294        // A fenced code block opening (```` ``` ```` or `~~~`, optional info
295        // string) before any prose is skipped wholesale up to its closing fence.
296        if collected.is_empty() {
297            if let Some(fence) = code_fence_marker(t) {
298                i += 1;
299                while i < lines.len() {
300                    let inner = lines[i].trim();
301                    i += 1;
302                    if closes_code_fence(inner, fence) {
303                        break;
304                    }
305                }
306                continue;
307            }
308        }
309
310        if t.is_empty() {
311            if collected.is_empty() {
312                // Still searching for the start of the first paragraph.
313                i += 1;
314                continue;
315            }
316            // Blank line ends the first paragraph.
317            break;
318        }
319
320        // ATX heading (CommonMark: `#`-run then space or EOL).
321        if is_atx_heading(t) {
322            if collected.is_empty() {
323                i += 1;
324                continue;
325            }
326            break;
327        }
328
329        // Setext heading: this line is the title and the NEXT non-empty line is
330        // an all-`=` or all-`-` underline. Only valid as the FIRST line of a
331        // paragraph (an underline mid-paragraph is not a setext heading). When
332        // recognized, skip both the title and the underline.
333        if collected.is_empty() {
334            if let Some(next) = lines.get(i + 1).map(|l| l.trim()) {
335                if is_setext_underline(next) {
336                    i += 2;
337                    continue;
338                }
339            }
340        }
341
342        collected.push(t);
343        i += 1;
344    }
345    if collected.is_empty() {
346        None
347    } else {
348        Some(collected.join(" "))
349    }
350}
351
352/// True if `line` (already trimmed) is an ATX heading per CommonMark: 1–6 `#`
353/// characters followed by a space/tab OR the end of the line. `#1 priority` and
354/// `#hashtag` are NOT headings (no space after the hash run); `#######` (7+) is
355/// not a heading either.
356fn is_atx_heading(line: &str) -> bool {
357    let hashes = line.chars().take_while(|&c| c == '#').count();
358    if hashes == 0 || hashes > 6 {
359        return false;
360    }
361    match line[hashes..].chars().next() {
362        None => true,                     // bare `###` (hashes then EOL)
363        Some(c) => c == ' ' || c == '\t', // `### Title`
364    }
365}
366
367/// The fence marker char (`` ` `` or `~`) if `line` (already trimmed) opens a
368/// fenced code block: at least three of the same fence char, optionally followed
369/// by an info string. Returns `None` otherwise.
370fn code_fence_marker(line: &str) -> Option<char> {
371    let first = line.chars().next()?;
372    if first != '`' && first != '~' {
373        return None;
374    }
375    let run = line.chars().take_while(|&c| c == first).count();
376    if run >= 3 {
377        Some(first)
378    } else {
379        None
380    }
381}
382
383/// True if `line` (already trimmed) is a closing fence for an open block opened
384/// with `fence`: at least three of the same fence char and nothing else (a
385/// closing fence carries no info string per CommonMark).
386fn closes_code_fence(line: &str, fence: char) -> bool {
387    let run = line.chars().take_while(|&c| c == fence).count();
388    run >= 3 && line.chars().all(|c| c == fence)
389}
390
391/// True if `line` (already trimmed) is a setext heading underline: a non-empty
392/// run of all `=` or all `-` characters (CommonMark allows trailing whitespace,
393/// already removed by the caller's `trim`).
394fn is_setext_underline(line: &str) -> bool {
395    (!line.is_empty() && line.chars().all(|c| c == '='))
396        || (!line.is_empty() && line.chars().all(|c| c == '-'))
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402    use crate::parser::{Config, Schema};
403    use std::fs;
404    use tempfile::TempDir;
405
406    // ── Fixtures ─────────────────────────────────────────────────────────────
407
408    /// A temp store with a `DB.md` marker and the given parsed config, built
409    /// directly (not via `Store::open`) so these tests exercise the `summary`
410    /// code under test, not store-open plumbing.
411    fn store_with(config: Config) -> (TempDir, Store) {
412        let tmp = TempDir::new().expect("tempdir");
413        let root = tmp.path().to_path_buf();
414        fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
415        let store = Store { root, config };
416        (tmp, store)
417    }
418
419    /// A store whose `## Schemas` declares a `summary_template` for `type_`.
420    fn store_with_template(type_: &str, template: &str) -> (TempDir, Store) {
421        let mut config = Config::default();
422        config.schemas.insert(
423            type_.to_string(),
424            Schema {
425                summary_template: Some(template.to_string()),
426                ..Schema::default()
427            },
428        );
429        store_with(config)
430    }
431
432    /// Build a [`Frontmatter`] from a YAML map literal so tests state intent in
433    /// YAML, not by hand-poking `extra`. This goes through `serde_norway` exactly
434    /// like a real file's frontmatter would.
435    fn fm(yaml: &str) -> Frontmatter {
436        let value: Value = serde_norway::from_str(yaml).expect("test yaml parses");
437        let mapping = value.as_mapping().expect("test yaml is a mapping").clone();
438        let mut f = Frontmatter::default();
439        for (k, v) in mapping {
440            let key = k.as_str().expect("string key").to_string();
441            match key.as_str() {
442                "type" => f.type_ = v.as_str().map(str::to_string),
443                "summary" => f.summary = v.as_str().map(str::to_string),
444                "id" => f.id = v.as_str().map(str::to_string),
445                "status" => f.status = v.as_str().map(str::to_string),
446                // Route the typed universal fields to their struct slots (NOT
447                // `extra`) so tests exercise the real `field_value` arms for
448                // `{tags}` / `{created}` / `{updated}` instead of masking them.
449                "tags" => {
450                    if let Value::Sequence(items) = &v {
451                        f.tags = items
452                            .iter()
453                            .filter_map(|i| i.as_str().map(str::to_string))
454                            .collect();
455                    }
456                }
457                "created" => {
458                    f.created = v
459                        .as_str()
460                        .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
461                }
462                "updated" => {
463                    f.updated = v
464                        .as_str()
465                        .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
466                }
467                _ => {
468                    f.extra.insert(key, v);
469                }
470            }
471        }
472        f
473    }
474
475    // ── normalize ────────────────────────────────────────────────────────────
476
477    #[test]
478    fn normalize_collapses_newlines_and_runs_to_single_spaces() {
479        let got = normalize("first line\nsecond\t\tline   third");
480        assert_eq!(got, "first line second line third");
481    }
482
483    #[test]
484    fn normalize_trims_surrounding_whitespace() {
485        assert_eq!(normalize("   padded value \n"), "padded value");
486    }
487
488    #[test]
489    fn normalize_caps_at_200_chars_on_char_boundary() {
490        // 250 multi-byte chars; the cap is by char, not byte.
491        let input = "é".repeat(250);
492        let got = normalize(&input);
493        assert_eq!(got.chars().count(), MAX_SUMMARY_LEN);
494        // Truncation must not corrupt UTF-8 (would panic on slice otherwise).
495        assert_eq!(got, "é".repeat(MAX_SUMMARY_LEN));
496    }
497
498    #[test]
499    fn normalize_leaves_short_strings_untouched() {
500        assert_eq!(normalize("short"), "short");
501    }
502
503    // ── collapse_whitespace (explicit `--summary` path) ──────────────────────
504
505    #[test]
506    fn regression_collapse_whitespace_preserves_long_explicit_summary() {
507        // Finding #17: an explicit agent `--summary` longer than the 200-char
508        // floor must be collapsed to a single line but NOT truncated — the
509        // `normalize` floor would have silently dropped the tail. The trailing
510        // qualifier (the part a >200-char summary would lose) must survive.
511        let long = format!(
512            "Director of Operations at Northstar; renewal champion who drove the 175-seat expansion and {}",
513            "x".repeat(150)
514        );
515        assert!(long.chars().count() > MAX_SUMMARY_LEN);
516        let collapsed = collapse_whitespace(&long);
517        // No truncation: every char is preserved.
518        assert_eq!(collapsed.chars().count(), long.chars().count());
519        assert_eq!(collapsed, long);
520        // Pre-fix `normalize` would have cut this to exactly MAX_SUMMARY_LEN.
521        assert!(normalize(&long).chars().count() == MAX_SUMMARY_LEN);
522        assert_ne!(collapse_whitespace(&long), normalize(&long));
523    }
524
525    #[test]
526    fn collapse_whitespace_still_collapses_to_single_line() {
527        // The single-line `SUMMARY_MULTILINE` half of the contract still holds —
528        // newlines/tabs collapse and the value is trimmed, just never cut.
529        assert_eq!(
530            collapse_whitespace("  multi\nline\tsummary  "),
531            "multi line summary"
532        );
533    }
534
535    // ── summary_template rendering ───────────────────────────────────────────
536
537    #[test]
538    fn template_interpolates_scalar_fields() {
539        let (_t, store) =
540            store_with_template("contact", "{role} at {company} (last_touch: {last_touch})");
541        let f = fm("type: contact\n\
542             role: Director of Operations\n\
543             company: \"[[records/companies/northstar]]\"\n\
544             last_touch: 2026-05-22\n");
545        // A wiki-link value reduces to its leaf; the template is the store's, not
546        // a built-in — that is the whole point.
547        assert_eq!(
548            compose_default(&store, "contact", &f, "ignored body").unwrap(),
549            "Director of Operations at northstar (last_touch: 2026-05-22)"
550        );
551    }
552
553    #[test]
554    fn template_interpolates_unquoted_scalar_wiki_link_fields() {
555        let (_t, store) = store_with_template("contact", "{role} at {company}");
556        let f = fm("type: contact\n\
557             role: Director\n\
558             company: [[records/companies/northstar]]\n");
559        assert_eq!(
560            compose_default(&store, "contact", &f, "").unwrap(),
561            "Director at northstar"
562        );
563    }
564
565    #[test]
566    fn template_drops_absent_fields_to_empty() {
567        let (_t, store) = store_with_template("contact", "{role} at {company}");
568        let f = fm("type: contact\nrole: Advisor\n");
569        // `{company}` absent → empty; `normalize` trims the trailing run.
570        assert_eq!(
571            compose_default(&store, "contact", &f, "").unwrap(),
572            "Advisor at"
573        );
574    }
575
576    #[test]
577    fn template_joins_list_fields_comma_separated() {
578        let (_t, store) = store_with_template("meeting", "{date}: {attendees}");
579        let f = fm("type: meeting\n\
580             date: 2026-05-10\n\
581             attendees:\n\
582             \x20 - \"[[records/contacts/alice]]\"\n\
583             \x20 - \"[[records/contacts/bob]]\"\n");
584        assert_eq!(
585            compose_default(&store, "meeting", &f, "").unwrap(),
586            "2026-05-10: alice, bob"
587        );
588    }
589
590    #[test]
591    fn template_interpolates_typed_tags_created_updated() {
592        // Regression: `field_value` skipped the typed `tags` / `created` /
593        // `updated` fields, so these `{…}` placeholders silently rendered empty
594        // even when the values were present.
595        let (_t, store) = store_with_template("note", "{tags} | {created}");
596        let f = fm("type: note\ntags: [urgent, q3]\ncreated: \"2026-05-01T00:00:00Z\"\n");
597        assert_eq!(
598            compose_default(&store, "note", &f, "").unwrap(),
599            // {tags} comma-joins; {created} renders canonical RFC3339 (offset form).
600            "urgent, q3 | 2026-05-01T00:00:00+00:00"
601        );
602    }
603
604    #[test]
605    fn template_joins_unquoted_block_wiki_link_list_fields() {
606        let (_t, store) = store_with_template("meeting", "{attendees}");
607        let f = fm("type: meeting\n\
608             attendees:\n\
609             \x20 - [[records/contacts/alice]]\n\
610             \x20 - [[records/contacts/bob]]\n");
611        assert_eq!(
612            compose_default(&store, "meeting", &f, "").unwrap(),
613            "alice, bob"
614        );
615    }
616
617    #[test]
618    fn template_emits_stray_brace_verbatim() {
619        let (_t, store) = store_with_template("note", "literal { brace {title}");
620        let f = fm("type: note\ntitle: Hello\n");
621        assert_eq!(
622            compose_default(&store, "note", &f, "").unwrap(),
623            "literal { brace Hello"
624        );
625    }
626
627    #[test]
628    fn template_is_deterministic_across_calls() {
629        let (_t, store) = store_with_template("contact", "{role} ({last_touch})");
630        let f = fm("type: contact\nrole: Ops Lead\nlast_touch: 2026-05-22\n");
631        let a = compose_default(&store, "contact", &f, "body").unwrap();
632        let b = compose_default(&store, "contact", &f, "body").unwrap();
633        assert_eq!(a, b);
634        assert_eq!(a, "Ops Lead (2026-05-22)");
635    }
636
637    #[test]
638    fn no_schema_for_type_falls_back_to_body() {
639        // Only `contact` has a template; `note` falls back to the body paragraph,
640        // proving no type carries a built-in template.
641        let (_t, store) = store_with_template("contact", "{role}");
642        let f = fm("type: note\n");
643        assert_eq!(
644            compose_default(&store, "note", &f, "Body sentence here.").unwrap(),
645            "Body sentence here."
646        );
647    }
648
649    // ── unknown / custom + body extraction ─────────────────────────────────────
650
651    #[test]
652    fn unknown_type_uses_first_non_heading_paragraph() {
653        let (_t, store) = store_with(Config::default());
654        let f = fm("type: proposal\n");
655        let body = "# Title\n\nThis proposal covers the Q3 roadmap.\n\nSecond paragraph.\n";
656        let got = compose_default(&store, "proposal", &f, body).unwrap();
657        assert_eq!(got, "This proposal covers the Q3 roadmap.");
658    }
659
660    #[test]
661    fn first_paragraph_joins_wrapped_lines_until_blank() {
662        let body = "Line one\nline two\n\nlater paragraph";
663        assert_eq!(first_paragraph(body).as_deref(), Some("Line one line two"));
664    }
665
666    #[test]
667    fn first_paragraph_none_for_heading_only_body() {
668        assert_eq!(first_paragraph("# Just a heading\n## And another\n"), None);
669    }
670
671    #[test]
672    fn unknown_type_long_paragraph_is_capped_at_200() {
673        let (_t, store) = store_with(Config::default());
674        let f = fm("type: note\n");
675        let long = "word ".repeat(100); // 500 chars
676        let got = compose_default(&store, "note", &f, &long).unwrap();
677        assert!(got.chars().count() <= MAX_SUMMARY_LEN);
678        assert!(got.chars().count() >= MAX_SUMMARY_LEN - 5); // close to the cap
679    }
680
681    // ── wiki-link reduction ────────────────────────────────────────────────────
682
683    #[test]
684    fn reduce_wiki_link_takes_leaf_segment() {
685        assert_eq!(
686            reduce_wiki_link("[[records/companies/northstar]]"),
687            "northstar"
688        );
689    }
690
691    #[test]
692    fn reduce_wiki_link_prefers_display() {
693        assert_eq!(
694            reduce_wiki_link("[[records/companies/x|Northstar Inc]]"),
695            "Northstar Inc"
696        );
697    }
698
699    #[test]
700    fn reduce_wiki_link_strips_md_extension() {
701        assert_eq!(reduce_wiki_link("[[records/companies/x.md]]"), "x");
702    }
703
704    #[test]
705    fn reduce_wiki_link_passes_through_plain_text() {
706        assert_eq!(reduce_wiki_link("just a vendor name"), "just a vendor name");
707    }
708
709    #[test]
710    fn regression_reduce_wiki_link_multiple_links_passthrough() {
711        // Finding #41: a scalar with more than one wiki-link starts `[[` and ends
712        // `]]` but is NOT a single link; reducing it dropped the first link and
713        // the connecting text, emitting a fragment of the last path (`globex`).
714        // It must now pass through unchanged.
715        let s = "[[records/companies/acme]] and [[records/companies/globex]]";
716        assert_eq!(reduce_wiki_link(s), s);
717        // The single-link and plain-text cases still reduce / pass as before.
718        assert_eq!(reduce_wiki_link("[[records/companies/acme]]"), "acme");
719        assert_eq!(reduce_wiki_link("Acme and Globex"), "Acme and Globex");
720    }
721
722    // ── first_paragraph heading-classification (findings #38, #39, #40) ────────
723
724    #[test]
725    fn regression_first_paragraph_skips_setext_heading() {
726        // Finding #38: a setext heading (title + `===` underline) is a heading,
727        // not prose — both lines must be skipped, yielding the real paragraph.
728        let body = "Launch Plan\n===========\n\nThis is the real first paragraph of prose.\n";
729        assert_eq!(
730            first_paragraph(body).as_deref(),
731            Some("This is the real first paragraph of prose.")
732        );
733        // Dash-underline setext (h2) is skipped the same way.
734        let body = "Section\n-------\n\nBody prose follows.\n";
735        assert_eq!(
736            first_paragraph(body).as_deref(),
737            Some("Body prose follows.")
738        );
739    }
740
741    #[test]
742    fn regression_first_paragraph_hash_without_space_is_prose() {
743        // Finding #39: `#1 priority…` / `#hashtag…` start with `#` but have no
744        // space after the hash run, so per CommonMark they are prose, not ATX
745        // headings — they must be summarized, not skipped/refused.
746        assert_eq!(
747            first_paragraph("#1 priority this week: fix onboarding drop-off.\n").as_deref(),
748            Some("#1 priority this week: fix onboarding drop-off.")
749        );
750        assert_eq!(
751            first_paragraph("#hashtag notes about the launch\n").as_deref(),
752            Some("#hashtag notes about the launch")
753        );
754        // With a following paragraph, the REAL first paragraph is summarized
755        // (not silently skipped to the second one).
756        assert_eq!(
757            first_paragraph("#1 priority: X\n\nSecond para.\n").as_deref(),
758            Some("#1 priority: X")
759        );
760        // A genuine ATX heading (hash + space) is still skipped.
761        assert_eq!(
762            first_paragraph("# Real heading\n\nThe actual prose.\n").as_deref(),
763            Some("The actual prose.")
764        );
765        // A bare `###` (hash run then EOL) is still a heading.
766        assert_eq!(
767            first_paragraph("###\n\nProse.\n").as_deref(),
768            Some("Prose.")
769        );
770    }
771
772    #[test]
773    fn regression_first_paragraph_skips_leading_fenced_code_block() {
774        // Finding #40: a body opening with a fenced code block must skip the
775        // whole block (fence info-string and any in-fence `#` comment) and take
776        // the first real prose paragraph after it.
777        let body =
778            "```bash\n# install dependencies\nnpm install\n```\n\nReal prose paragraph here.\n";
779        assert_eq!(
780            first_paragraph(body).as_deref(),
781            Some("Real prose paragraph here.")
782        );
783        // Tilde fences are handled the same way.
784        let body = "~~~\ncode line\n~~~\n\nProse after tilde fence.\n";
785        assert_eq!(
786            first_paragraph(body).as_deref(),
787            Some("Prose after tilde fence.")
788        );
789    }
790
791    #[test]
792    fn compose_from_body_handles_hash_prose_setext_and_fence() {
793        // End-to-end via `compose_from_body` (the `dbmd write` fallback path): a
794        // hash-prose sole paragraph composes a summary rather than yielding empty
795        // (which made `dbmd write` refuse the file).
796        assert_eq!(
797            compose_from_body("#1 priority this week: fix onboarding.\n"),
798            "#1 priority this week: fix onboarding."
799        );
800        assert_eq!(
801            compose_from_body("Launch Plan\n===========\n\nThe real prose.\n"),
802            "The real prose."
803        );
804        assert_eq!(
805            compose_from_body("```bash\n# step\n```\n\nThe real prose.\n"),
806            "The real prose."
807        );
808    }
809
810    #[test]
811    fn is_atx_heading_applies_commonmark_space_rule() {
812        assert!(is_atx_heading("# Title"));
813        assert!(is_atx_heading("###### Deep"));
814        assert!(is_atx_heading("###")); // hashes then EOL
815        assert!(!is_atx_heading("#1 priority"));
816        assert!(!is_atx_heading("#hashtag"));
817        assert!(!is_atx_heading("####### too many")); // 7 hashes
818        assert!(!is_atx_heading("plain"));
819    }
820
821    #[test]
822    fn code_fence_and_setext_helpers() {
823        assert_eq!(code_fence_marker("```bash"), Some('`'));
824        assert_eq!(code_fence_marker("~~~"), Some('~'));
825        assert_eq!(code_fence_marker("``"), None); // only two backticks
826        assert_eq!(code_fence_marker("plain"), None);
827        assert!(closes_code_fence("```", '`'));
828        assert!(!closes_code_fence("```bash", '`')); // info string ⇒ not a close
829        assert!(!closes_code_fence("~~~", '`')); // wrong fence char
830        assert!(is_setext_underline("==="));
831        assert!(is_setext_underline("---"));
832        assert!(!is_setext_underline("- item")); // not all dashes
833        assert!(!is_setext_underline(""));
834    }
835}