ai-memory 0.7.0

AI-agnostic persistent memory system — MCP server, HTTP API, and CLI for any AI platform
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
// Copyright 2026 AlphaOne LLC
// SPDX-License-Identifier: Apache-2.0

//! MCP `memory_load_family` and `memory_smart_load` handlers and routing helpers.

use crate::embeddings::{Embed, Embedder};
use crate::mcp::param_names;
use crate::mcp::registry::McpTool;
use crate::models::Memory;
use crate::{db, validate};
use schemars::JsonSchema;
use serde::Deserialize;
use serde_json::{Value, json};

// --- D1.3 (#984): per-tool McpTool impls for the 2 v0.7.0-B loaders ---

/// v0.7.0 #972 D1.3 (#984) — request body for `memory_load_family`.
/// Schemars-derived schema replaces the hand-coded entry in
/// [`crate::mcp::registry::tool_definitions`] (D1.6 (#987) collapses
/// the macro).
#[derive(Debug, Clone, Default, Deserialize, JsonSchema)]
#[allow(dead_code)]
pub struct LoadFamilyRequest {
    /// MCP tool family (8 groups) — NOT the memory_kind taxonomy. See #864.
    pub family: String,

    /// Namespace filter. Default all.
    #[serde(default)]
    pub namespace: Option<String>,

    /// Top-k cap 100.
    #[serde(default)]
    pub k: Option<i64>,
}

/// v0.7.0 #972 D1.3 (#984) — `McpTool` impl for `memory_load_family`.
#[allow(dead_code)]
pub struct LoadFamilyTool;

impl McpTool for LoadFamilyTool {
    fn name() -> &'static str {
        crate::mcp::registry::tool_names::MEMORY_LOAD_FAMILY
    }
    fn description() -> &'static str {
        "Load top-k recent + high-priority memories from a Family."
    }
    fn docs() -> &'static str {
        "B1: top-k by metadata.family. Always-on; alternative to memory_recall when family is known. \
         Issue #864 — `family` here is the MCP tool family (8 groups: \
         core/lifecycle/graph/governance/power/meta/archive/other), \
         NOT the memory_kind taxonomy (Observation/Reflection/Decision/Event/etc)."
    }
    fn input_schema() -> Value {
        crate::mcp::registry::input_schema_for::<LoadFamilyRequest>()
    }
    fn family() -> &'static str {
        crate::profile::Family::Core.name()
    }
}

/// v0.7.0 #972 D1.3 (#984) — request body for `memory_smart_load`.
#[derive(Debug, Clone, Default, Deserialize, JsonSchema)]
#[allow(dead_code)]
pub struct SmartLoadRequest {
    /// Free-text goal.
    pub intent: String,

    /// Namespace filter. Default all.
    #[serde(default)]
    pub namespace: Option<String>,

    /// Top-k cap 100.
    #[serde(default)]
    pub k: Option<i64>,
}

/// v0.7.0 #972 D1.3 (#984) — `McpTool` impl for `memory_smart_load`.
#[allow(dead_code)]
pub struct SmartLoadTool;

impl McpTool for SmartLoadTool {
    fn name() -> &'static str {
        crate::mcp::registry::tool_names::MEMORY_SMART_LOAD
    }
    fn description() -> &'static str {
        "Intent-routed loader: free-text intent picks the best Family."
    }
    fn docs() -> &'static str {
        "B2: pick best Family from free-text intent, then forward to memory_load_family. \
         Issue #864 — `Family` here is the MCP tool family (8 groups: \
         core/lifecycle/graph/governance/power/meta/archive/other), \
         NOT the memory_kind taxonomy (Observation/Reflection/Decision/Event/etc)."
    }
    fn input_schema() -> Value {
        crate::mcp::registry::input_schema_for::<SmartLoadRequest>()
    }
    fn family() -> &'static str {
        crate::profile::Family::Core.name()
    }
}

#[cfg(test)]
mod d1_3_984_tests {
    //! D1.3 (#984) — schema parity for the 2 v0.7.0-B loader tools
    //! (`memory_load_family`, `memory_smart_load`).
    //! Reuses the allowed-diffs catalog documented in d1_2_983_tests.
    use super::*;

    fn legacy_props(tool_name: &str) -> serde_json::Map<String, Value> {
        let defs = crate::mcp::registry::tool_definitions();
        let tools = defs
            .get("tools")
            .and_then(Value::as_array)
            .expect("tool_definitions emits `tools` array");
        let entry = tools
            .iter()
            .find(|t| t.get("name").and_then(Value::as_str) == Some(tool_name))
            .unwrap_or_else(|| panic!("{tool_name} must be in legacy catalog"));
        entry
            .pointer("/inputSchema/properties")
            .and_then(Value::as_object)
            .unwrap_or_else(|| panic!("{tool_name}.inputSchema.properties must be object"))
            .clone()
    }

    fn derived_props_for<T: schemars::JsonSchema>() -> serde_json::Map<String, Value> {
        let schema = schemars::schema_for!(T);
        let v = serde_json::to_value(schema).expect("schema → value");
        v.get("properties")
            .and_then(Value::as_object)
            .or_else(|| {
                v.pointer(&format!(
                    "/definitions/{}/properties",
                    std::any::type_name::<T>().rsplit("::").next().unwrap_or("")
                ))
                .and_then(Value::as_object)
            })
            .cloned()
            .expect("schemars schema must have properties at a known path")
    }

    fn assert_property_set_parity(tool_name: &str, derived: &serde_json::Map<String, Value>) {
        let legacy = legacy_props(tool_name);
        let legacy_keys: std::collections::BTreeSet<&str> =
            legacy.keys().map(String::as_str).collect();
        let derived_keys: std::collections::BTreeSet<&str> =
            derived.keys().map(String::as_str).collect();
        assert_eq!(
            legacy_keys,
            derived_keys,
            "{tool_name}: property set drift; diff = {:?}",
            legacy_keys
                .symmetric_difference(&derived_keys)
                .collect::<Vec<_>>()
        );
    }

    fn assert_descriptions_match(tool_name: &str, derived: &serde_json::Map<String, Value>) {
        let legacy = legacy_props(tool_name);
        for (name, legacy_prop) in &legacy {
            if let Some(want) = legacy_prop.get("description").and_then(Value::as_str) {
                let got = derived
                    .get(name)
                    .and_then(|p| p.get("description"))
                    .and_then(Value::as_str);
                assert_eq!(
                    got,
                    Some(want),
                    "{tool_name}.{name}: description must match legacy byte-for-byte"
                );
            }
        }
    }

    #[test]
    fn load_family_parity_984() {
        let derived = derived_props_for::<LoadFamilyRequest>();
        assert_property_set_parity("memory_load_family", &derived);
        assert_descriptions_match("memory_load_family", &derived);
    }

    #[test]
    fn smart_load_parity_984() {
        let derived = derived_props_for::<SmartLoadRequest>();
        assert_property_set_parity("memory_smart_load", &derived);
        assert_descriptions_match("memory_smart_load", &derived);
    }

    #[test]
    fn load_family_tool_metadata_984() {
        assert_eq!(LoadFamilyTool::name(), "memory_load_family");
        assert_eq!(LoadFamilyTool::family(), "core");
        assert_eq!(SmartLoadTool::name(), "memory_smart_load");
        assert_eq!(SmartLoadTool::family(), "core");
    }
}

#[cfg(test)]
mod issue_1589_tests {
    //! #1589 — the loader docstrings cite memory_kind taxonomy examples
    //! ("NOT the memory_kind taxonomy (Observation/Reflection/...)").
    //! The pre-fix text named "Plan", which is not in the 10-kind
    //! Form-6 vocabulary (docs/memory-kind-vocab.md). Pin that every
    //! kind example cited inside the taxonomy parenthetical parses as
    //! a real `MemoryKind` so the phantom kind cannot reappear.
    use super::*;
    use crate::models::MemoryKind;

    /// The docstring fragment that opens the taxonomy-example
    /// parenthetical in both loader docs.
    const TAXONOMY_PARENTHETICAL_OPEN: &str = "memory_kind taxonomy (";

    fn assert_taxonomy_examples_valid(docs: &str) {
        let start = docs
            .find(TAXONOMY_PARENTHETICAL_OPEN)
            .expect("loader docs must carry the memory_kind taxonomy disambiguation");
        let after = &docs[start + TAXONOMY_PARENTHETICAL_OPEN.len()..];
        let inner = &after[..after.find(')').expect("parenthetical must close")];
        let examples: Vec<&str> = inner
            .split('/')
            .map(str::trim)
            .filter(|t| !t.is_empty() && *t != "etc")
            .collect();
        assert!(
            !examples.is_empty(),
            "taxonomy parenthetical must cite at least one kind example"
        );
        for ex in examples {
            assert!(
                MemoryKind::from_str(&ex.to_ascii_lowercase()).is_some(),
                "docstring cites {ex:?}, which is not a valid MemoryKind; \
                 valid kinds = {:?}",
                MemoryKind::all()
                    .iter()
                    .map(MemoryKind::as_str)
                    .collect::<Vec<_>>()
            );
        }
    }

    #[test]
    fn load_family_docs_taxonomy_examples_are_valid_kinds_1589() {
        assert_taxonomy_examples_valid(LoadFamilyTool::docs());
    }

    #[test]
    fn smart_load_docs_taxonomy_examples_are_valid_kinds_1589() {
        assert_taxonomy_examples_valid(SmartLoadTool::docs());
    }
}
/// v0.7 B1 — `memory_load_family(family, namespace?, k?)`.
///
/// Always-on alternative to `memory_recall` for the case where the agent
/// already knows which `Family` taxonomy bucket it wants. Returns the
/// top-k recent + high-priority memories whose `metadata.family` matches
/// the requested enum, ordered by `priority DESC, updated_at DESC`,
/// optionally restricted to a single namespace.
///
/// Conventions:
///
/// - `family` is required. Validated against the eight-family enum
///   (core/lifecycle/graph/governance/power/meta/archive/other) — anything
///   else returns the same `ProfileParseError::UnknownFamily` diagnostic
///   the rest of the codebase uses, so the error message lists the valid
///   options.
/// - `namespace` is optional. When omitted the query spans every
///   namespace; this matches `memory_list`'s "no namespace = all"
///   convention.
/// - `k` defaults to 20 (mirroring `memory_list`'s default `limit`) and
///   is capped at 100 to bound the response payload. Values outside
///   `[1, 100]` are clamped silently rather than rejected — the cap is
///   for response budget, not for correctness.
///
/// Filter shape: `json_extract(memories.metadata, '$.family') = ?` —
/// no schema change is needed because v0.7 B1 stores the family tag in
/// the existing free-form `metadata` JSON column. Memories that don't
/// carry a `metadata.family` are invisible to this tool by design (the
/// caller would use `memory_list` or `memory_recall` for the unfiltered
/// case).
///
/// Response shape:
/// ```json
/// {
///   "family": "core",
///   "namespace": "projects/alpha",   // or null when omitted
///   "k": 20,
///   "count": 3,
///   "memories": [<MemoryRow>, ...]
/// }

pub fn handle_load_family(
    conn: &rusqlite::Connection,
    params: &Value,
    caller: Option<&str>,
) -> Result<Value, String> {
    use crate::profile::Family;
    use std::str::FromStr;

    let family_raw = params["family"].as_str().ok_or("family is required")?;
    // Reuse the canonical enum parser so the diagnostic on a bad
    // `family` value lists the valid options verbatim. Lowercase only,
    // matching the rest of the family vocabulary.
    let family = Family::from_str(family_raw).map_err(|e| e.to_string())?;
    let family_name = family.name();

    let namespace = params.get(param_names::NAMESPACE).and_then(Value::as_str);
    if let Some(ns) = namespace {
        validate::validate_namespace(ns).map_err(|e| e.to_string())?;
    }

    // Default 20, cap at 100 (per spec). Anything below 1 collapses to 1
    // — calling `memory_load_family(k=0)` is almost always a bug, and
    // the always-return-at-least-one shape lines up with R1's recall
    // budget guarantee.
    let k_raw = params
        .get(param_names::K)
        .and_then(Value::as_u64)
        .unwrap_or(20);
    let k = usize::try_from(k_raw).unwrap_or(usize::MAX).clamp(1, 100);

    let now = chrono::Utc::now().to_rfc3339();
    let mut stmt = conn
        .prepare(
            "SELECT id, tier, namespace, title, content, tags, priority, confidence, source, \
                    access_count, created_at, updated_at, last_accessed_at, expires_at, metadata \
             FROM memories \
             WHERE (?1 IS NULL OR namespace = ?1) \
               AND json_extract(metadata, '$.family') = ?2 \
               AND (expires_at IS NULL OR expires_at > ?3) \
             ORDER BY priority DESC, updated_at DESC \
             LIMIT ?4",
        )
        .map_err(|e| format!("prepare memory_load_family failed: {e}"))?;

    let rows = stmt
        .query_map(
            rusqlite::params![namespace, family_name, now, k],
            db::row_to_memory,
        )
        .map_err(|e| format!("query memory_load_family failed: {e}"))?;
    let memories: Vec<Memory> = rows
        .collect::<rusqlite::Result<Vec<_>>>()
        .map_err(|e| format!("collect memory_load_family rows failed: {e}"))?;

    // #1555 — scope=private visibility post-filter, parity with the sibling
    // read tools (list/search/recall) and applied through the canonical
    // `is_visible_to_caller` predicate so the inbox / target_agent_id carve-out
    // is honored (a naive `metadata.scope != 'private'` SQL clause would miss
    // it). `caller == None` is the single-tenant trust-all posture (unchanged).
    // `count` is recomputed from the filtered set so the wire count stays honest.
    let memories: Vec<Memory> = match caller {
        Some(c) => memories
            .into_iter()
            .filter(|m| crate::visibility::is_visible_to_caller(m, c))
            .collect(),
        None => memories,
    };

    Ok(json!({
        "family": family_name,
        "namespace": namespace,
        "k": k,
        "count": memories.len(),
        "memories": memories,
    }))
}

/// v0.7 B2 — `memory_smart_load(intent, namespace?, k?)`.
///
/// Always-on intent-routed loader. Caller passes a free-text intent
/// (e.g. "I'm about to debug a flaky test"); the handler picks the best
/// `Family` and forwards to [`handle_load_family`]. The agent does not
/// need to know the family taxonomy — it only describes what it's
/// about to do.
///
/// Routing strategy:
///
/// - **Embedder available (B3 wiring, future):** when an `Embedder` is
///   provided, embed the intent and score it against the cached family
///   descriptor embeddings via cosine similarity. The family with the
///   top score wins; the score is reported alongside the answer.
/// - **Fallback (no embedder, e.g. keyword tier):** a deterministic
///   keyword-overlap scorer maps the intent to the family with the
///   highest descriptor-token overlap. The score is the normalized
///   overlap ratio in `[0.0, 1.0]`. When no descriptor matches at all
///   (e.g. an empty or wholly off-topic intent), the routing falls back
///   to `Family::Core` and `chosen_family_source` is reported as
///   `"fallback"` so the caller can detect the no-signal case.
///
/// Response shape:
/// ```json
/// {
///   "chosen_family": "graph",
///   "score": 0.62,
///   "chosen_family_source": "embedder" | "keyword" | "fallback",
///   "intent": "<echoed input>",
///   "namespace": "projects/alpha", // or null
///   "k": 20,
///   "count": 3,
///   "memories": [<MemoryRow>, ...]
/// }
/// ```
///
/// `k` defaults to 20 (mirroring `memory_load_family`) and is capped at
/// 100. `intent` is required and may not be empty after trimming.
pub fn handle_smart_load(
    conn: &rusqlite::Connection,
    params: &Value,
    embedder: Option<&dyn Embed>,
    caller: Option<&str>,
) -> Result<Value, String> {
    let intent_raw = params["intent"].as_str().ok_or("intent is required")?;
    let intent = intent_raw.trim();
    if intent.is_empty() {
        // Empty intent is the canonical "no signal" case — route to
        // Core and surface `chosen_family_source: "fallback"` so the
        // caller can detect it. `handle_load_family` then runs the same
        // DB query memory_load_family(family=core) would.
        let resp = forward_to_load_family(
            conn,
            crate::profile::Family::Core,
            0.0,
            "fallback",
            intent,
            params,
            caller,
        )?;
        return Ok(resp);
    }

    // Round-4 — keyword-veto strategy. Always run the deterministic
    // keyword scorer first. If it produces a non-fallback signal (i.e.
    // at least one intent token overlapped some family's descriptor
    // or tool-name segments), let the embedder vote — but veto the
    // embedder when it disagrees. Rationale: the embedder's cosine
    // similarity over ~80-word descriptors is noisy for short
    // imperative intents like "store a new memory" or "verify a
    // memory's signature" — Round-3 measured 8/10 routing accuracy,
    // Round-4 measured 4–5/10 with the embedder winning on common
    // verbs but mis-routing them to `archive`. The keyword path is
    // hand-tuned (F14) and deterministic; treat it as ground truth
    // when it has a signal, fall back to the embedder only when it
    // returns `"fallback"` (no token overlap anywhere).
    let kw_pick = fallback_via_keywords(intent);
    let (family, score, source) = match embedder {
        Some(emb) => match best_family_via_embedder(emb, intent) {
            Some((emb_family, emb_score)) => {
                if kw_pick.2 == "keyword" && kw_pick.0 != emb_family {
                    // Keyword scored a non-fallback hit AND disagreed with
                    // the embedder — trust the deterministic scorer.
                    kw_pick
                } else {
                    (emb_family, emb_score, "embedder")
                }
            }
            None => kw_pick,
        },
        None => kw_pick,
    };

    forward_to_load_family(conn, family, score, source, intent, params, caller)
}

/// Build the `memory_smart_load` response by forwarding to
/// [`handle_load_family`] with the chosen family. The forwarded JSON is
/// flattened into the smart_load response shape so callers see one
/// payload, not a nested `load_family_response` blob.
fn forward_to_load_family(
    conn: &rusqlite::Connection,
    family: crate::profile::Family,
    score: f32,
    source: &str,
    intent: &str,
    params: &Value,
    caller: Option<&str>,
) -> Result<Value, String> {
    let family_name = family.name();
    tracing::info!(
        target: "memory_smart_load",
        chosen_family = family_name,
        score = score,
        source = source,
        intent_len = intent.len(),
        "smart_load routed intent to family"
    );

    // Build the payload memory_load_family expects: family + the
    // forwarded namespace + k from the caller.
    let mut forward = json!({"family": family_name});
    if let Some(ns) = params.get(param_names::NAMESPACE).and_then(Value::as_str) {
        forward["namespace"] = json!(ns);
    }
    if let Some(k) = params.get(param_names::K).and_then(Value::as_u64) {
        forward["k"] = json!(k);
    }

    let inner = handle_load_family(conn, &forward, caller)?;
    let memories = inner.get("memories").cloned().unwrap_or_else(|| json!([]));
    let count = inner.get("count").cloned().unwrap_or_else(|| json!(0));
    let k = inner
        .get(param_names::K)
        .cloned()
        .unwrap_or_else(|| json!(20));
    let namespace = inner
        .get(param_names::NAMESPACE)
        .cloned()
        .unwrap_or(Value::Null);

    // Round score to 3 decimals at the wire — keeps the JSON readable
    // without leaking f32 quantisation noise (same convention as
    // memory_check_duplicate).
    let score_rounded = (f64::from(score) * crate::SCORE_DISPLAY_ROUND_FACTOR).round()
        / crate::SCORE_DISPLAY_ROUND_FACTOR;

    Ok(json!({
        "chosen_family": family_name,
        "score": score_rounded,
        "chosen_family_source": source,
        "intent": intent,
        "namespace": namespace,
        "k": k,
        "count": count,
        "memories": memories,
    }))
}

/// Embedder-driven family pick. Embeds the intent, scores it against
/// the cached descriptor for each family, and returns the top-scoring
/// family + similarity. Returns `None` when the embedder fails
/// (network blip, model not loaded, etc.) so the caller can fall back
/// to the keyword scorer.
///
/// **B3 forward-compat note:** when B3 lands and `AppState` carries
/// `family_embeddings: Arc<RwLock<Option<Vec<(Family, Vec<f32>)>>>>`
/// plus `best_family_match(intent)`, this helper should be replaced
/// with a call into `state.best_family_match(intent)`. Until then, the
/// descriptors are embedded inline on every call — accurate but not
/// the production caching shape.
fn best_family_via_embedder(
    emb: &dyn Embed,
    intent: &str,
) -> Option<(crate::profile::Family, f32)> {
    use crate::profile::Family;

    let intent_vec = emb.embed_query(intent).ok()?;
    let mut best: Option<(Family, f32)> = None;
    for family in Family::all() {
        let descriptor = family_descriptor(*family);
        let Ok(desc_vec) = emb.embed(descriptor) else {
            continue;
        };
        let score = Embedder::cosine_similarity(&intent_vec, &desc_vec);
        if best.is_none_or(|(_, s)| score > s) {
            best = Some((*family, score));
        }
    }
    best
}

/// Deterministic keyword-overlap scorer used when no embedder is
/// available (e.g. the `keyword` feature tier) or when the embedder
/// returns an error mid-call. Splits `intent` on ASCII non-alphanumeric
/// boundaries, lowercases, and counts how many tokens overlap each
/// family's combined token set (descriptor ∪ tool-name tokens).
///
/// Round-2 F14 — the family token set is the union of:
/// 1. The family's descriptor (free-text intent vocabulary).
/// 2. The family's tool names tokenised on underscore boundaries
///    (`memory_notify` → `memory`, `notify`).
/// 3. The family's tool names as full identifiers (`memory_notify`
///    kept as a single token so an intent that names the tool
///    verbatim still scores).
///
/// Tool-name overlaps are weighted 2x descriptor overlaps: a tool
/// name is a stronger signal than a generic intent vocabulary
/// keyword. Without this, intents like "send a notification to
/// another agent" mis-routed to `meta` (because "agent" appears in
/// the meta descriptor) instead of `other` (where `memory_notify`
/// lives).
///
/// The universal `memory` prefix on every tool name is excluded
/// from the tool-name overlap count so it doesn't dominate the
/// score across all families.
///
/// Ties broken by family declaration order so routing is stable.
/// When no token matches at all, routing falls back to
/// `Family::Core` with score 0.0 and source `"fallback"`.
fn fallback_via_keywords(intent: &str) -> (crate::profile::Family, f32, &'static str) {
    use crate::profile::Family;

    let intent_tokens: Vec<String> = intent
        .split(|c: char| !c.is_ascii_alphanumeric())
        .filter(|s| !s.is_empty())
        .map(str::to_ascii_lowercase)
        .collect();
    if intent_tokens.is_empty() {
        return (Family::Core, 0.0, "fallback");
    }

    let mut best: Option<(Family, f32)> = None;
    for family in Family::all() {
        let descriptor = family_descriptor(*family).to_ascii_lowercase();
        let desc_tokens: Vec<String> = descriptor
            .split(|c: char| !c.is_ascii_alphanumeric())
            .filter(|s| !s.is_empty())
            .map(str::to_string)
            .collect();
        // Round-2 F14 — for each tool in the family, compute the
        // count of DISTINCT intent tokens that match the tool's
        // segments (or its full identifier). A tool whose name
        // encodes BOTH intent keywords (e.g. `expand_query` matches
        // intent="expand a query" on both `expand` AND `query`)
        // contributes a stronger signal than a tool whose name only
        // matches one keyword (e.g. `kg_query` only matches `query`).
        // The per-tool distinct-token count is summed across the
        // family AND tracked as a max so a single highly-specific
        // tool can pull a family above one that matches via several
        // weak tools.
        //
        // Match relation = exact token equality OR shared 5+ char
        // prefix when both tokens are ≥ 5 chars long. The prefix
        // relaxation lets "notification" match `notify` segment
        // (shared "notif" prefix), which the strict-equality form
        // missed. Without this, intents that use a different
        // English surface form than the tool name's stem
        // (notify/notification, subscribe/subscription, etc.) only
        // matched via the wider descriptor vocabulary.
        let token_matches = |a: &str, b: &str| -> bool {
            if a == b {
                return true;
            }
            // Prefix relaxation guard: both tokens ≥ 5 chars and
            // share a 5-char prefix. Threshold 5 keeps "store"/
            // "stories", "task"/"taskbar" from cross-matching.
            if a.len() >= 5 && b.len() >= 5 && a[..5] == b[..5] {
                return true;
            }
            false
        };

        let mut tool_distinct_sum: usize = 0;
        let mut tool_distinct_max: usize = 0;
        let mut full_id_hits: usize = 0;
        for tool_name in family.tool_names() {
            let lower = tool_name.to_ascii_lowercase();
            // Segments from underscore-split. Skip the universal
            // `memory` prefix (it's noise — every tool has it).
            let segments: Vec<&str> = lower
                .split('_')
                .filter(|s| !s.is_empty() && *s != "memory")
                .collect();
            // Distinct intent tokens that match any segment of THIS
            // tool name (exact OR 5-char-prefix relaxed match).
            let distinct = intent_tokens
                .iter()
                .filter(|t| segments.iter().any(|seg| token_matches(seg, t.as_str())))
                .count();
            tool_distinct_sum += distinct;
            if distinct > tool_distinct_max {
                tool_distinct_max = distinct;
            }
            // Full-identifier hit — when an intent token EQUALS the
            // full tool name (with underscores), the caller has
            // named the tool verbatim. Strongest signal.
            if intent_tokens.iter().any(|t| t.as_str() == lower) {
                full_id_hits += 1;
            }
        }

        let desc_overlap = intent_tokens
            .iter()
            .filter(|t| desc_tokens.iter().any(|d| d == *t))
            .count();

        if desc_overlap == 0 && tool_distinct_sum == 0 && full_id_hits == 0 {
            continue;
        }

        // Round-2 F14 — composite score:
        //   2.0 * descriptor overlap (curated intent vocabulary —
        //         each family's descriptor is hand-tuned to capture
        //         the family's purpose, so a hit is high-signal)
        // + 1.0 * sum of distinct-intent-tokens-per-tool (boost
        //         when a tool name's segments encode intent
        //         keywords — broader, more false-positive prone
        //         than the descriptor)
        // + 2.0 * tool_distinct_max (strong extra boost when ONE
        //         tool name matches MULTIPLE intent keywords —
        //         distinguishes `expand_query` matching both
        //         "expand" and "query" from `kg_query` matching
        //         only "query")
        // + 4.0 * full-identifier hits (caller named the tool
        //         verbatim — overwhelming signal)
        // Normalised by intent token count so single-token intents
        // still produce a sensible score in [0, ~1+].
        #[allow(clippy::cast_precision_loss)]
        let score = (2.0 * desc_overlap as f32
            + tool_distinct_sum as f32
            + 2.0 * tool_distinct_max as f32
            + 4.0 * full_id_hits as f32)
            / (intent_tokens.len() as f32);
        if best.is_none_or(|(_, s)| score > s) {
            best = Some((*family, score));
        }
    }

    best.map_or((Family::Core, 0.0, "fallback"), |(f, s)| (f, s, "keyword"))
}

/// Per-family descriptor used as the embedding/keyword target. Each
/// descriptor is a short paragraph of intent-style language that
/// captures what an agent might say when about to act on that family's
/// tools. Source-anchored at the family enum in `src/profile.rs`.
///
/// **B3 forward-compat note:** when B3 lands these strings will be
/// embedded once at startup and cached on `AppState::family_embeddings`,
/// rather than re-embedded per smart_load call. The strings themselves
/// stay anchored here so the cache and the keyword fallback share one
/// vocabulary.
fn family_descriptor(family: crate::profile::Family) -> &'static str {
    use crate::profile::Family;
    match family {
        Family::Core => {
            "store remember save record memory note write recall fetch get \
             search find list browse read load family core baseline"
        }
        Family::Lifecycle => {
            "update edit modify change delete remove forget purge garbage \
             collect promote upgrade downgrade migrate refresh rotate"
        }
        Family::Graph => {
            "graph link relation entity knowledge kg query timeline replay \
             verify path traverse find_paths connect taxonomy alias debug \
             flaky test investigate trace ancestry"
        }
        Family::Governance => {
            "approve reject pending policy permission rule namespace \
             standard subscribe unsubscribe governance review audit"
        }
        Family::Power => {
            "consolidate merge contradiction duplicate auto tag expand \
             query inbox subscription replay dlq dead letter retry power \
             llm augment"
        }
        Family::Meta => {
            "capabilities agent register session start stats meta info \
             discovery introspection bootstrap"
        }
        Family::Archive => {
            "archive backup restore purge old historical retention cold \
             storage"
        }
        // Round-2 F14 — extended with "notification message send
        // dm direct another recipient inbox" so an intent like "I
        // want to send a notification to another agent" routes to
        // `other` (where `memory_notify` lives). The tool-name boost
        // (2x weight on `memory_notify` → `notify`) plus the wider
        // vocabulary covers both "notify" and "notification" surface
        // forms.
        Family::Other => {
            "subscription notify subscribe webhook event other miscellaneous \
             notification message send dm direct another recipient inbox"
        }
    }
}