Skip to main content

dbmd_core/
summary.rs

1//! `summary` — the deterministic default-`summary` composer.
2//!
3//! Used by `dbmd fm init` and `dbmd write` when the agent doesn't supply a
4//! `summary`. [`compose_default`] renders the type's `summary_template` (from
5//! the store's `DB.md ## Schemas`) when one is declared, and otherwise falls
6//! back to the body's first non-heading paragraph. No type carries a built-in
7//! template — the template, like the schema, is the store's to declare.
8//!
9//! Contract: **deterministic** (same `(type, frontmatter, body)` → same
10//! string), **single-line** (newlines collapsed to spaces), and **capped at 200
11//! chars** (the SPEC readability bound). The tool generates a deterministic
12//! floor; the agent provides the ceiling via `dbmd fm set <file> summary='…'`.
13
14use serde_norway::Value;
15
16use crate::parser::Frontmatter;
17use crate::store::Store;
18
19/// The SPEC's `summary` length bound, in characters.
20pub const MAX_SUMMARY_LEN: usize = 200;
21
22/// Compose a deterministic default `summary` for a file from its `type`,
23/// frontmatter, and body. If the store's `## Schemas` declares a
24/// `summary_template` for the type, it is rendered with `{field}` interpolation;
25/// otherwise the default is the body's first non-heading paragraph. The result
26/// is always single-line and ≤ [`MAX_SUMMARY_LEN`] chars.
27///
28/// The tool generates a deterministic floor; the agent provides the ceiling via
29/// `dbmd fm set <file> summary='…'`.
30pub fn compose_default(
31    store: &Store,
32    type_: &str,
33    frontmatter: &Frontmatter,
34    body: &str,
35) -> crate::Result<String> {
36    let composed = match store
37        .config
38        .schemas
39        .get(type_)
40        .and_then(|s| s.summary_template.as_deref())
41    {
42        Some(template) => render_template(template, frontmatter),
43        None => compose_from_body(body),
44    };
45    Ok(normalize(&composed))
46}
47
48/// Render a `summary_template` — substitute each `{field}` with the file's
49/// frontmatter value for `field`. A scalar (incl. a wiki-link, reduced to its
50/// display-or-leaf form) renders inline; a list renders comma-joined; an
51/// absent/empty field renders empty. An unmatched `{` is emitted verbatim
52/// (templates are simple field-interpolation floors, not a templating language).
53fn render_template(template: &str, fm: &Frontmatter) -> String {
54    let mut out = String::with_capacity(template.len());
55    let mut rest = template;
56    while let Some(open) = rest.find('{') {
57        out.push_str(&rest[..open]);
58        let after = &rest[open + 1..];
59        let close = after.find('}');
60        let next_open = after.find('{');
61        match close {
62            // A clean `{field}` — no nested `{` before the closing `}`.
63            Some(c) if next_open.is_none_or(|n| n > c) => {
64                let key = after[..c].trim();
65                if let Some(scalar) = field_text(fm, key) {
66                    out.push_str(&scalar);
67                } else {
68                    let list = list_field_texts(fm, key);
69                    if !list.is_empty() {
70                        out.push_str(&list.join(", "));
71                    }
72                }
73                rest = &after[c + 1..];
74            }
75            // A stray `{` (no `}`, or another `{` first) — emit it verbatim.
76            _ => {
77                out.push('{');
78                rest = after;
79            }
80        }
81    }
82    out.push_str(rest);
83    out
84}
85
86/// The body fallback: the file's first non-heading paragraph, truncated to
87/// [`MAX_SUMMARY_LEN`] chars (the truncation is applied by [`normalize`]).
88pub fn compose_from_body(body: &str) -> String {
89    first_paragraph(body).unwrap_or_default()
90}
91
92/// Collapse a candidate summary to the **single-line** half of the contract:
93/// runs of whitespace (including newlines) become single spaces and the result
94/// is trimmed — but the length is **not** truncated. This is what an explicit
95/// agent-supplied `--summary` is normalized through (`dbmd write`/`dbmd fm
96/// init`): it must satisfy `SUMMARY_MULTILINE` without losing the agent's
97/// content, matching the `dbmd fm set` path (which preserves the value
98/// verbatim) and the SPEC stance that the agent provides the ceiling. The
99/// validator surfaces an over-long value as a `SUMMARY_TOO_LONG` *warning*, not
100/// silent truncation.
101pub fn collapse_whitespace(candidate: &str) -> String {
102    // `split_whitespace` collapses any run of ASCII/Unicode whitespace
103    // (spaces, tabs, newlines) and trims leading/trailing — giving the
104    // single-line, trimmed form in one pass.
105    candidate.split_whitespace().collect::<Vec<_>>().join(" ")
106}
107
108/// Normalize a candidate summary to the full deterministic-**floor** contract:
109/// collapse whitespace (via [`collapse_whitespace`]) then truncate to
110/// [`MAX_SUMMARY_LEN`] **chars** (never splitting a UTF-8 codepoint). Used by
111/// [`compose_default`] for the tool-generated floor. Explicit agent summaries
112/// go through [`collapse_whitespace`] instead, so they are never silently cut.
113pub fn normalize(candidate: &str) -> String {
114    truncate_chars(&collapse_whitespace(candidate), MAX_SUMMARY_LEN)
115}
116
117// ── Internal helpers ────────────────────────────────────────────────────────
118
119/// Truncate to at most `max` Unicode scalar values, on a char boundary.
120fn truncate_chars(s: &str, max: usize) -> String {
121    match s.char_indices().nth(max) {
122        Some((byte_idx, _)) => s[..byte_idx].to_string(),
123        None => s.to_string(),
124    }
125}
126
127/// Read a frontmatter field's raw YAML value, checking the universal typed
128/// fields first and then [`Frontmatter::extra`] — mirroring the documented
129/// contract of `Frontmatter::get` but reading the struct directly so this
130/// module never depends on another module's body.
131fn field_value(fm: &Frontmatter, key: &str) -> Option<Value> {
132    match key {
133        "type" => fm.type_.clone().map(Value::String),
134        "id" => fm.id.clone().map(Value::String),
135        "summary" => fm.summary.clone().map(Value::String),
136        "status" => fm.status.clone().map(Value::String),
137        // Typed universal fields a `summary_template` may legitimately
138        // interpolate. `created`/`updated` render as their canonical RFC3339
139        // string; `tags` as a sequence (which `list_field_texts` comma-joins).
140        // Without these arms, `{created}` / `{updated}` / `{tags}` would
141        // silently render empty even when the value is present.
142        "created" => fm.created.map(|t| Value::String(t.to_rfc3339())),
143        "updated" => fm.updated.map(|t| Value::String(t.to_rfc3339())),
144        "tags" => {
145            if fm.tags.is_empty() {
146                None
147            } else {
148                Some(Value::Sequence(
149                    fm.tags.iter().cloned().map(Value::String).collect(),
150                ))
151            }
152        }
153        _ => fm.extra.get(key).cloned(),
154    }
155}
156
157/// Read a single frontmatter field as a rendered plain-text scalar, or `None`
158/// when the field is absent, null, or renders empty. Wiki-link-valued fields
159/// are reduced to their display-or-leaf human form (never the raw `[[...]]`).
160fn field_text(fm: &Frontmatter, key: &str) -> Option<String> {
161    let v = field_value(fm, key)?;
162    let rendered = render_scalar(&v)?;
163    let trimmed = rendered.trim();
164    if trimmed.is_empty() {
165        None
166    } else {
167        Some(trimmed.to_string())
168    }
169}
170
171/// Read a list-valued frontmatter field as rendered plain-text items. A scalar
172/// (non-sequence) value is treated as a single-item list. Wiki-link items are
173/// reduced to their display-or-leaf form. Empty / null items are dropped.
174fn list_field_texts(fm: &Frontmatter, key: &str) -> Vec<String> {
175    let Some(v) = field_value(fm, key) else {
176        return Vec::new();
177    };
178    match v {
179        Value::Sequence(items) => items
180            .iter()
181            .filter_map(|item| {
182                let r = render_scalar(item)?;
183                let t = r.trim();
184                if t.is_empty() {
185                    None
186                } else {
187                    Some(t.to_string())
188                }
189            })
190            .collect(),
191        other => render_scalar(&other)
192            .map(|r| r.trim().to_string())
193            .filter(|t| !t.is_empty())
194            .into_iter()
195            .collect(),
196    }
197}
198
199/// Render a single YAML scalar to plain display text. Strings (including YAML
200/// date scalars, which deserialize as strings) are returned as-is but with any
201/// wiki-link reduced to display-or-leaf; numbers and bools stringify
202/// canonically; null / mapping / nested-sequence yield `None`.
203fn render_scalar(v: &Value) -> Option<String> {
204    match v {
205        Value::String(s) => Some(reduce_wiki_link(s)),
206        Value::Sequence(_) => render_unquoted_wiki_link(v),
207        Value::Bool(b) => Some(b.to_string()),
208        Value::Number(n) => {
209            // Render integers without a trailing `.0`; keep the natural form
210            // otherwise. `Number`'s Display already does this.
211            Some(n.to_string())
212        }
213        Value::Null | Value::Mapping(_) | Value::Tagged(_) => None,
214    }
215}
216
217/// YAML parses an unquoted wiki-link scalar (`company: [[records/x]]`) as a
218/// nested sequence, not a string. Recognize that shape so summary templates
219/// render it exactly like the quoted scalar form.
220fn render_unquoted_wiki_link(v: &Value) -> Option<String> {
221    let Value::Sequence(outer) = v else {
222        return None;
223    };
224    if outer.len() != 1 {
225        return None;
226    }
227    let Value::Sequence(inner) = &outer[0] else {
228        return None;
229    };
230    let [Value::String(target)] = inner.as_slice() else {
231        return None;
232    };
233    Some(reduce_wiki_link(&format!("[[{target}]]")))
234}
235
236/// If `s` is a wiki-link (`[[target]]` or `[[target|display]]`), reduce it to
237/// the human form: the `display` override when present, else the last path
238/// segment of the target (with any `.md` suffix dropped). Non-link strings are
239/// returned unchanged.
240fn reduce_wiki_link(s: &str) -> String {
241    let trimmed = s.trim();
242    let inner = trimmed
243        .strip_prefix("[[")
244        .and_then(|rest| rest.strip_suffix("]]"));
245    let Some(inner) = inner else {
246        return s.to_string();
247    };
248    // `target|display` → prefer display.
249    let (target, display) = match inner.split_once('|') {
250        Some((t, d)) => (t, Some(d)),
251        None => (inner, None),
252    };
253    if let Some(d) = display {
254        let d = d.trim();
255        if !d.is_empty() {
256            return d.to_string();
257        }
258    }
259    let leaf = target.trim().rsplit('/').next().unwrap_or(target).trim();
260    leaf.strip_suffix(".md").unwrap_or(leaf).to_string()
261}
262
263/// The first non-heading, non-blank paragraph of the body: consecutive
264/// non-heading text lines joined by a space, starting at the first such line.
265/// Heading lines (`#…`) are skipped. `None` when the body has no prose.
266fn first_paragraph(body: &str) -> Option<String> {
267    let mut collected: Vec<&str> = Vec::new();
268    for line in body.lines() {
269        let t = line.trim();
270        if t.is_empty() {
271            if collected.is_empty() {
272                // Still searching for the start of the first paragraph.
273                continue;
274            }
275            // Blank line ends the first paragraph.
276            break;
277        }
278        if t.starts_with('#') {
279            if collected.is_empty() {
280                // A heading before any prose — skip it.
281                continue;
282            }
283            // A heading terminates the running paragraph.
284            break;
285        }
286        collected.push(t);
287    }
288    if collected.is_empty() {
289        None
290    } else {
291        Some(collected.join(" "))
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use crate::parser::{Config, Schema};
299    use std::fs;
300    use tempfile::TempDir;
301
302    // ── Fixtures ─────────────────────────────────────────────────────────────
303
304    /// A temp store with a `DB.md` marker and the given parsed config, built
305    /// directly (not via `Store::open`) so these tests exercise the `summary`
306    /// code under test, not store-open plumbing.
307    fn store_with(config: Config) -> (TempDir, Store) {
308        let tmp = TempDir::new().expect("tempdir");
309        let root = tmp.path().to_path_buf();
310        fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
311        let store = Store { root, config };
312        (tmp, store)
313    }
314
315    /// A store whose `## Schemas` declares a `summary_template` for `type_`.
316    fn store_with_template(type_: &str, template: &str) -> (TempDir, Store) {
317        let mut config = Config::default();
318        config.schemas.insert(
319            type_.to_string(),
320            Schema {
321                summary_template: Some(template.to_string()),
322                ..Schema::default()
323            },
324        );
325        store_with(config)
326    }
327
328    /// Build a [`Frontmatter`] from a YAML map literal so tests state intent in
329    /// YAML, not by hand-poking `extra`. This goes through `serde_norway` exactly
330    /// like a real file's frontmatter would.
331    fn fm(yaml: &str) -> Frontmatter {
332        let value: Value = serde_norway::from_str(yaml).expect("test yaml parses");
333        let mapping = value.as_mapping().expect("test yaml is a mapping").clone();
334        let mut f = Frontmatter::default();
335        for (k, v) in mapping {
336            let key = k.as_str().expect("string key").to_string();
337            match key.as_str() {
338                "type" => f.type_ = v.as_str().map(str::to_string),
339                "summary" => f.summary = v.as_str().map(str::to_string),
340                "id" => f.id = v.as_str().map(str::to_string),
341                "status" => f.status = v.as_str().map(str::to_string),
342                // Route the typed universal fields to their struct slots (NOT
343                // `extra`) so tests exercise the real `field_value` arms for
344                // `{tags}` / `{created}` / `{updated}` instead of masking them.
345                "tags" => {
346                    if let Value::Sequence(items) = &v {
347                        f.tags = items
348                            .iter()
349                            .filter_map(|i| i.as_str().map(str::to_string))
350                            .collect();
351                    }
352                }
353                "created" => {
354                    f.created = v
355                        .as_str()
356                        .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
357                }
358                "updated" => {
359                    f.updated = v
360                        .as_str()
361                        .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
362                }
363                _ => {
364                    f.extra.insert(key, v);
365                }
366            }
367        }
368        f
369    }
370
371    // ── normalize ────────────────────────────────────────────────────────────
372
373    #[test]
374    fn normalize_collapses_newlines_and_runs_to_single_spaces() {
375        let got = normalize("first line\nsecond\t\tline   third");
376        assert_eq!(got, "first line second line third");
377    }
378
379    #[test]
380    fn normalize_trims_surrounding_whitespace() {
381        assert_eq!(normalize("   padded value \n"), "padded value");
382    }
383
384    #[test]
385    fn normalize_caps_at_200_chars_on_char_boundary() {
386        // 250 multi-byte chars; the cap is by char, not byte.
387        let input = "é".repeat(250);
388        let got = normalize(&input);
389        assert_eq!(got.chars().count(), MAX_SUMMARY_LEN);
390        // Truncation must not corrupt UTF-8 (would panic on slice otherwise).
391        assert_eq!(got, "é".repeat(MAX_SUMMARY_LEN));
392    }
393
394    #[test]
395    fn normalize_leaves_short_strings_untouched() {
396        assert_eq!(normalize("short"), "short");
397    }
398
399    // ── collapse_whitespace (explicit `--summary` path) ──────────────────────
400
401    #[test]
402    fn regression_collapse_whitespace_preserves_long_explicit_summary() {
403        // Finding #17: an explicit agent `--summary` longer than the 200-char
404        // floor must be collapsed to a single line but NOT truncated — the
405        // `normalize` floor would have silently dropped the tail. The trailing
406        // qualifier (the part a >200-char summary would lose) must survive.
407        let long = format!(
408            "Director of Operations at Northstar; renewal champion who drove the 175-seat expansion and {}",
409            "x".repeat(150)
410        );
411        assert!(long.chars().count() > MAX_SUMMARY_LEN);
412        let collapsed = collapse_whitespace(&long);
413        // No truncation: every char is preserved.
414        assert_eq!(collapsed.chars().count(), long.chars().count());
415        assert_eq!(collapsed, long);
416        // Pre-fix `normalize` would have cut this to exactly MAX_SUMMARY_LEN.
417        assert!(normalize(&long).chars().count() == MAX_SUMMARY_LEN);
418        assert_ne!(collapse_whitespace(&long), normalize(&long));
419    }
420
421    #[test]
422    fn collapse_whitespace_still_collapses_to_single_line() {
423        // The single-line `SUMMARY_MULTILINE` half of the contract still holds —
424        // newlines/tabs collapse and the value is trimmed, just never cut.
425        assert_eq!(
426            collapse_whitespace("  multi\nline\tsummary  "),
427            "multi line summary"
428        );
429    }
430
431    // ── summary_template rendering ───────────────────────────────────────────
432
433    #[test]
434    fn template_interpolates_scalar_fields() {
435        let (_t, store) =
436            store_with_template("contact", "{role} at {company} (last_touch: {last_touch})");
437        let f = fm("type: contact\n\
438             role: Director of Operations\n\
439             company: \"[[records/companies/northstar]]\"\n\
440             last_touch: 2026-05-22\n");
441        // A wiki-link value reduces to its leaf; the template is the store's, not
442        // a built-in — that is the whole point.
443        assert_eq!(
444            compose_default(&store, "contact", &f, "ignored body").unwrap(),
445            "Director of Operations at northstar (last_touch: 2026-05-22)"
446        );
447    }
448
449    #[test]
450    fn template_interpolates_unquoted_scalar_wiki_link_fields() {
451        let (_t, store) = store_with_template("contact", "{role} at {company}");
452        let f = fm("type: contact\n\
453             role: Director\n\
454             company: [[records/companies/northstar]]\n");
455        assert_eq!(
456            compose_default(&store, "contact", &f, "").unwrap(),
457            "Director at northstar"
458        );
459    }
460
461    #[test]
462    fn template_drops_absent_fields_to_empty() {
463        let (_t, store) = store_with_template("contact", "{role} at {company}");
464        let f = fm("type: contact\nrole: Advisor\n");
465        // `{company}` absent → empty; `normalize` trims the trailing run.
466        assert_eq!(
467            compose_default(&store, "contact", &f, "").unwrap(),
468            "Advisor at"
469        );
470    }
471
472    #[test]
473    fn template_joins_list_fields_comma_separated() {
474        let (_t, store) = store_with_template("meeting", "{date}: {attendees}");
475        let f = fm("type: meeting\n\
476             date: 2026-05-10\n\
477             attendees:\n\
478             \x20 - \"[[records/contacts/alice]]\"\n\
479             \x20 - \"[[records/contacts/bob]]\"\n");
480        assert_eq!(
481            compose_default(&store, "meeting", &f, "").unwrap(),
482            "2026-05-10: alice, bob"
483        );
484    }
485
486    #[test]
487    fn template_interpolates_typed_tags_created_updated() {
488        // Regression: `field_value` skipped the typed `tags` / `created` /
489        // `updated` fields, so these `{…}` placeholders silently rendered empty
490        // even when the values were present.
491        let (_t, store) = store_with_template("note", "{tags} | {created}");
492        let f = fm("type: note\ntags: [urgent, q3]\ncreated: \"2026-05-01T00:00:00Z\"\n");
493        assert_eq!(
494            compose_default(&store, "note", &f, "").unwrap(),
495            // {tags} comma-joins; {created} renders canonical RFC3339 (offset form).
496            "urgent, q3 | 2026-05-01T00:00:00+00:00"
497        );
498    }
499
500    #[test]
501    fn template_joins_unquoted_block_wiki_link_list_fields() {
502        let (_t, store) = store_with_template("meeting", "{attendees}");
503        let f = fm("type: meeting\n\
504             attendees:\n\
505             \x20 - [[records/contacts/alice]]\n\
506             \x20 - [[records/contacts/bob]]\n");
507        assert_eq!(
508            compose_default(&store, "meeting", &f, "").unwrap(),
509            "alice, bob"
510        );
511    }
512
513    #[test]
514    fn template_emits_stray_brace_verbatim() {
515        let (_t, store) = store_with_template("note", "literal { brace {title}");
516        let f = fm("type: note\ntitle: Hello\n");
517        assert_eq!(
518            compose_default(&store, "note", &f, "").unwrap(),
519            "literal { brace Hello"
520        );
521    }
522
523    #[test]
524    fn template_is_deterministic_across_calls() {
525        let (_t, store) = store_with_template("contact", "{role} ({last_touch})");
526        let f = fm("type: contact\nrole: Ops Lead\nlast_touch: 2026-05-22\n");
527        let a = compose_default(&store, "contact", &f, "body").unwrap();
528        let b = compose_default(&store, "contact", &f, "body").unwrap();
529        assert_eq!(a, b);
530        assert_eq!(a, "Ops Lead (2026-05-22)");
531    }
532
533    #[test]
534    fn no_schema_for_type_falls_back_to_body() {
535        // Only `contact` has a template; `note` falls back to the body paragraph,
536        // proving no type carries a built-in template.
537        let (_t, store) = store_with_template("contact", "{role}");
538        let f = fm("type: note\n");
539        assert_eq!(
540            compose_default(&store, "note", &f, "Body sentence here.").unwrap(),
541            "Body sentence here."
542        );
543    }
544
545    // ── unknown / custom + body extraction ─────────────────────────────────────
546
547    #[test]
548    fn unknown_type_uses_first_non_heading_paragraph() {
549        let (_t, store) = store_with(Config::default());
550        let f = fm("type: proposal\n");
551        let body = "# Title\n\nThis proposal covers the Q3 roadmap.\n\nSecond paragraph.\n";
552        let got = compose_default(&store, "proposal", &f, body).unwrap();
553        assert_eq!(got, "This proposal covers the Q3 roadmap.");
554    }
555
556    #[test]
557    fn first_paragraph_joins_wrapped_lines_until_blank() {
558        let body = "Line one\nline two\n\nlater paragraph";
559        assert_eq!(first_paragraph(body).as_deref(), Some("Line one line two"));
560    }
561
562    #[test]
563    fn first_paragraph_none_for_heading_only_body() {
564        assert_eq!(first_paragraph("# Just a heading\n## And another\n"), None);
565    }
566
567    #[test]
568    fn unknown_type_long_paragraph_is_capped_at_200() {
569        let (_t, store) = store_with(Config::default());
570        let f = fm("type: note\n");
571        let long = "word ".repeat(100); // 500 chars
572        let got = compose_default(&store, "note", &f, &long).unwrap();
573        assert!(got.chars().count() <= MAX_SUMMARY_LEN);
574        assert!(got.chars().count() >= MAX_SUMMARY_LEN - 5); // close to the cap
575    }
576
577    // ── wiki-link reduction ────────────────────────────────────────────────────
578
579    #[test]
580    fn reduce_wiki_link_takes_leaf_segment() {
581        assert_eq!(
582            reduce_wiki_link("[[records/companies/northstar]]"),
583            "northstar"
584        );
585    }
586
587    #[test]
588    fn reduce_wiki_link_prefers_display() {
589        assert_eq!(
590            reduce_wiki_link("[[records/companies/x|Northstar Inc]]"),
591            "Northstar Inc"
592        );
593    }
594
595    #[test]
596    fn reduce_wiki_link_strips_md_extension() {
597        assert_eq!(reduce_wiki_link("[[records/companies/x.md]]"), "x");
598    }
599
600    #[test]
601    fn reduce_wiki_link_passes_through_plain_text() {
602        assert_eq!(reduce_wiki_link("just a vendor name"), "just a vendor name");
603    }
604}