Skip to main content

ai_memory/cli/
consolidate.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! `cmd_consolidate` and `cmd_auto_consolidate` migrations. See
5//! `cli::store` for the design pattern.
6
7use crate::cli::CliOutput;
8use crate::models::field_names;
9use crate::{db, identity, models, validate};
10use anyhow::Result;
11use clap::Args;
12use models::Tier;
13use std::path::Path;
14
15#[derive(Args)]
16pub struct ConsolidateArgs {
17    /// Comma-separated memory IDs
18    pub ids: String,
19    #[arg(long, short = 'T', allow_hyphen_values = true)]
20    pub title: String,
21    #[arg(long, short = 's', allow_hyphen_values = true)]
22    pub summary: String,
23    #[arg(long, short)]
24    pub namespace: Option<String>,
25}
26
27#[derive(Args)]
28pub struct AutoConsolidateArgs {
29    /// Namespace to consolidate
30    #[arg(long, short)]
31    pub namespace: Option<String>,
32    /// Only consolidate short-term memories
33    #[arg(long, default_value_t = false)]
34    pub short_only: bool,
35    /// Minimum number of memories to trigger consolidation
36    #[arg(long, default_value_t = 3)]
37    pub min_count: usize,
38    /// Dry run — show what would be consolidated without doing it
39    #[arg(long, default_value_t = false)]
40    pub dry_run: bool,
41}
42
43/// `consolidate` handler.
44pub fn run(
45    db_path: &Path,
46    args: ConsolidateArgs,
47    json_out: bool,
48    cli_agent_id: Option<&str>,
49    out: &mut CliOutput<'_>,
50) -> Result<()> {
51    let ids: Vec<String> = args
52        .ids
53        .split(',')
54        .map(|s| s.trim().to_string())
55        .filter(|s| !s.is_empty())
56        .collect();
57    // #1590 — explicit --namespace > configured [storage].default_namespace
58    // > git remote > cwd basename > "global" (see `cli::helpers`).
59    let namespace = crate::cli::helpers::resolve_namespace(args.namespace);
60    validate::validate_consolidate(&ids, &args.title, &args.summary, &namespace)?;
61    let conn = db::open(db_path)?;
62    let consolidator_agent_id = identity::resolve_agent_id(cli_agent_id, None)?;
63    let new_id = db::consolidate(
64        &conn,
65        &ids,
66        &args.title,
67        &args.summary,
68        &namespace,
69        &Tier::Long,
70        "cli",
71        &consolidator_agent_id,
72    )?;
73    if json_out {
74        writeln!(
75            out.stdout,
76            "{}",
77            serde_json::json!({"id": new_id, (field_names::CONSOLIDATED): ids.len()})
78        )?;
79    } else {
80        writeln!(
81            out.stdout,
82            "consolidated {} memories into: {}",
83            ids.len(),
84            new_id
85        )?;
86    }
87    Ok(())
88}
89
90/// `auto-consolidate` handler.
91#[allow(clippy::too_many_lines)]
92pub fn run_auto(
93    db_path: &Path,
94    args: &AutoConsolidateArgs,
95    json_out: bool,
96    cli_agent_id: Option<&str>,
97    out: &mut CliOutput<'_>,
98) -> Result<()> {
99    let conn = db::open(db_path)?;
100    let consolidator_agent_id = identity::resolve_agent_id(cli_agent_id, None)?;
101    let tier_filter = if args.short_only {
102        Some(Tier::Short)
103    } else {
104        None
105    };
106    let namespaces = if let Some(ref ns) = args.namespace {
107        vec![models::NamespaceCount {
108            namespace: ns.clone(),
109            count: 0,
110        }]
111    } else {
112        db::list_namespaces(&conn)?
113    };
114
115    let mut total = 0;
116    let mut groups = Vec::new();
117
118    for ns in &namespaces {
119        let memories = db::list(
120            &conn,
121            Some(&ns.namespace),
122            tier_filter.as_ref(),
123            200,
124            0,
125            None,
126            None,
127            None,
128            None,
129            None,
130        )?;
131        if memories.len() < args.min_count {
132            continue;
133        }
134
135        // Group by all tags (each memory appears in every tag group it belongs to)
136        let mut tag_groups: std::collections::HashMap<String, Vec<&models::Memory>> =
137            std::collections::HashMap::new();
138        for mem in &memories {
139            if mem.tags.is_empty() {
140                tag_groups
141                    .entry("_untagged".to_string())
142                    .or_default()
143                    .push(mem);
144            } else {
145                for tag in &mem.tags {
146                    tag_groups.entry(tag.clone()).or_default().push(mem);
147                }
148            }
149        }
150
151        let mut consolidated_ids: std::collections::HashSet<String> =
152            std::collections::HashSet::new();
153        for (tag, group) in &tag_groups {
154            // Skip memories already consolidated in another tag group
155            let group: Vec<&&models::Memory> = group
156                .iter()
157                .filter(|m| !consolidated_ids.contains(&m.id))
158                .collect();
159            if group.len() < args.min_count {
160                continue;
161            }
162            let ids: Vec<String> = group.iter().map(|m| m.id.clone()).collect();
163            if args.dry_run {
164                let titles: Vec<&str> = group.iter().map(|m| m.title.as_str()).collect();
165                groups.push(serde_json::json!({"namespace": ns.namespace, "tag": tag, "count": group.len(), "titles": titles}));
166            } else {
167                let title = format!(
168                    "Consolidated: {} ({} memories)",
169                    if tag == "_untagged" {
170                        &ns.namespace
171                    } else {
172                        tag
173                    },
174                    group.len()
175                );
176                let content: String = group
177                    .iter()
178                    .map(|m| format!("- {}: {}", m.title, &m.content[..m.content.len().min(200)]))
179                    .collect::<Vec<_>>()
180                    .join("\n");
181                db::consolidate(
182                    &conn,
183                    &ids,
184                    &title,
185                    &content,
186                    &ns.namespace,
187                    &Tier::Long,
188                    "auto-consolidate",
189                    &consolidator_agent_id,
190                )?;
191                consolidated_ids.extend(ids);
192                total += group.len();
193            }
194        }
195    }
196
197    if json_out {
198        if args.dry_run {
199            writeln!(
200                out.stdout,
201                "{}",
202                serde_json::json!({"dry_run": true, "groups": groups})
203            )?;
204        } else {
205            writeln!(
206                out.stdout,
207                "{}",
208                serde_json::json!({(field_names::CONSOLIDATED): total})
209            )?;
210        }
211    } else if args.dry_run {
212        writeln!(out.stdout, "dry run — would consolidate:")?;
213        for g in &groups {
214            writeln!(
215                out.stdout,
216                "  {} [{}]: {} memories",
217                g["namespace"], g["tag"], g["count"]
218            )?;
219        }
220    } else {
221        writeln!(out.stdout, "auto-consolidated {total} memories")?;
222    }
223    Ok(())
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use crate::cli::test_utils::{TestEnv, seed_memory};
230
231    fn ns_args() -> ConsolidateArgs {
232        ConsolidateArgs {
233            ids: String::new(),
234            title: "consolidated title".to_string(),
235            summary: "merged summary".to_string(),
236            namespace: Some("test-ns".to_string()),
237        }
238    }
239
240    #[test]
241    fn test_consolidate_happy_path() {
242        let mut env = TestEnv::fresh();
243        let db = env.db_path.clone();
244        let id1 = seed_memory(&db, "test-ns", "first", "alpha");
245        let id2 = seed_memory(&db, "test-ns", "second", "beta");
246        let mut args = ns_args();
247        args.ids = format!("{id1},{id2}");
248        {
249            let mut out = env.output();
250            run(&db, args, false, Some("test-agent"), &mut out).unwrap();
251        }
252        assert!(env.stdout_str().contains("consolidated 2 memories into:"));
253    }
254
255    #[test]
256    fn test_consolidate_json_output() {
257        let mut env = TestEnv::fresh();
258        let db = env.db_path.clone();
259        let id1 = seed_memory(&db, "test-ns", "a1", "data1");
260        let id2 = seed_memory(&db, "test-ns", "a2", "data2");
261        let mut args = ns_args();
262        args.ids = format!("{id1},{id2}");
263        {
264            let mut out = env.output();
265            run(&db, args, true, Some("test-agent"), &mut out).unwrap();
266        }
267        let v: serde_json::Value = serde_json::from_str(env.stdout_str().trim()).unwrap();
268        assert!(v["id"].is_string());
269        assert_eq!(v["consolidated"].as_u64().unwrap(), 2);
270    }
271
272    #[test]
273    fn test_consolidate_single_id_validation_error() {
274        let mut env = TestEnv::fresh();
275        let db = env.db_path.clone();
276        let id1 = seed_memory(&db, "test-ns", "lone", "only-one");
277        let mut args = ns_args();
278        args.ids = id1;
279        let mut out = env.output();
280        let res = run(&db, args, false, Some("test-agent"), &mut out);
281        assert!(res.is_err(), "single id should fail validation");
282    }
283
284    #[test]
285    fn test_consolidate_invalid_namespace() {
286        let mut env = TestEnv::fresh();
287        let db = env.db_path.clone();
288        let id1 = seed_memory(&db, "test-ns", "x", "y");
289        let id2 = seed_memory(&db, "test-ns", "x2", "y2");
290        let mut args = ns_args();
291        args.ids = format!("{id1},{id2}");
292        // Reserved/empty namespace; validate_namespace rejects empty.
293        args.namespace = Some(String::new());
294        let mut out = env.output();
295        let res = run(&db, args, false, Some("test-agent"), &mut out);
296        assert!(res.is_err());
297    }
298
299    #[test]
300    fn test_auto_consolidate_dry_run_lists_groups() {
301        let mut env = TestEnv::fresh();
302        let db = env.db_path.clone();
303        // Seed several memories in the same ns so the threshold trips.
304        for i in 0..4 {
305            seed_memory(&db, "auto-ns", &format!("title-{i}"), &format!("body-{i}"));
306        }
307        let args = AutoConsolidateArgs {
308            namespace: Some("auto-ns".to_string()),
309            short_only: false,
310            min_count: 3,
311            dry_run: true,
312        };
313        {
314            let mut out = env.output();
315            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
316        }
317        assert!(env.stdout_str().contains("dry run"));
318    }
319
320    #[test]
321    fn test_auto_consolidate_below_min_count_no_op() {
322        let mut env = TestEnv::fresh();
323        let db = env.db_path.clone();
324        // Only one memory — well below min_count=3.
325        seed_memory(&db, "auto-ns", "lone", "only");
326        let args = AutoConsolidateArgs {
327            namespace: Some("auto-ns".to_string()),
328            short_only: false,
329            min_count: 3,
330            dry_run: false,
331        };
332        {
333            let mut out = env.output();
334            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
335        }
336        assert!(env.stdout_str().contains("auto-consolidated 0"));
337    }
338
339    #[test]
340    fn test_auto_consolidate_json_output() {
341        let mut env = TestEnv::fresh();
342        let db = env.db_path.clone();
343        for i in 0..4 {
344            seed_memory(&db, "auto-ns", &format!("t{i}"), &format!("b{i}"));
345        }
346        let args = AutoConsolidateArgs {
347            namespace: Some("auto-ns".to_string()),
348            short_only: false,
349            min_count: 3,
350            dry_run: false,
351        };
352        {
353            let mut out = env.output();
354            run_auto(&db, &args, true, Some("test-agent"), &mut out).unwrap();
355        }
356        let v: serde_json::Value = serde_json::from_str(env.stdout_str().trim()).unwrap();
357        assert!(v["consolidated"].as_u64().is_some());
358    }
359
360    // ---------- E1 coverage uplift -----------------------------------
361    // Targets: auto_consolidate non-dry-run actual write, dry-run with
362    // tag groups, dry-run JSON output, short_only filter, multi-tag
363    // membership skipping, default-namespace branch.
364
365    /// Insert a memory with explicit tags. Bypasses the CLI entirely
366    /// (the shared `seed_memory` doesn't take tags).
367    fn seed_tagged_memory(db: &std::path::Path, ns: &str, title: &str, tags: &[&str]) -> String {
368        let conn = db::open(db).expect("db::open");
369        let now = chrono::Utc::now().to_rfc3339();
370        let mut metadata = crate::models::default_metadata();
371        if let Some(obj) = metadata.as_object_mut() {
372            obj.insert(
373                "agent_id".to_string(),
374                serde_json::Value::String("test-agent".to_string()),
375            );
376        }
377        let mem = crate::models::Memory {
378            id: uuid::Uuid::new_v4().to_string(),
379            tier: crate::models::Tier::Mid,
380            namespace: ns.to_string(),
381            title: title.to_string(),
382            content: format!("body for {title}"),
383            tags: tags.iter().map(|t| (*t).to_string()).collect(),
384            priority: 5,
385            confidence: 1.0,
386            source: "test".to_string(),
387            access_count: 0,
388            created_at: now.clone(),
389            updated_at: now,
390            last_accessed_at: None,
391            expires_at: None,
392            metadata,
393            reflection_depth: 0,
394            memory_kind: crate::models::MemoryKind::Observation,
395            entity_id: None,
396            persona_version: None,
397            citations: Vec::new(),
398            source_uri: None,
399            source_span: None,
400            confidence_source: crate::models::ConfidenceSource::CallerProvided,
401            confidence_signals: None,
402            confidence_decayed_at: None,
403            version: 1,
404        };
405        db::insert(&conn, &mem).expect("db::insert")
406    }
407
408    #[test]
409    fn test_auto_consolidate_persists_untagged_group() {
410        // Seed 3 untagged memories — they all land in the `_untagged`
411        // tag group which trips the min_count=3 threshold.
412        let mut env = TestEnv::fresh();
413        let db = env.db_path.clone();
414        for i in 0..3 {
415            seed_memory(&db, "auto-untag", &format!("u{i}"), &format!("b{i}"));
416        }
417        let args = AutoConsolidateArgs {
418            namespace: Some("auto-untag".to_string()),
419            short_only: false,
420            min_count: 3,
421            dry_run: false,
422        };
423        {
424            let mut out = env.output();
425            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
426        }
427        let s = env.stdout_str();
428        // 3 memories consolidated (one untagged group at threshold).
429        assert!(s.contains("auto-consolidated 3 memories"), "got: {s}");
430    }
431
432    #[test]
433    fn test_auto_consolidate_dry_run_json_lists_groups() {
434        // Hits the `dry_run` + `json_out` branch of run_auto.
435        let mut env = TestEnv::fresh();
436        let db = env.db_path.clone();
437        for i in 0..4 {
438            seed_memory(&db, "auto-jdry", &format!("t{i}"), &format!("b{i}"));
439        }
440        let args = AutoConsolidateArgs {
441            namespace: Some("auto-jdry".to_string()),
442            short_only: false,
443            min_count: 3,
444            dry_run: true,
445        };
446        {
447            let mut out = env.output();
448            run_auto(&db, &args, true, Some("test-agent"), &mut out).unwrap();
449        }
450        let v: serde_json::Value = serde_json::from_str(env.stdout_str().trim()).unwrap();
451        assert_eq!(v["dry_run"].as_bool().unwrap(), true);
452        assert!(v["groups"].is_array());
453        assert!(!v["groups"].as_array().unwrap().is_empty());
454    }
455
456    #[test]
457    fn test_auto_consolidate_tagged_groups_dry_run_text() {
458        // Each memory is tagged with one of two tags. With min_count=2
459        // each tag group is eligible. Dry-run text path lists both.
460        let mut env = TestEnv::fresh();
461        let db = env.db_path.clone();
462        for i in 0..2 {
463            seed_tagged_memory(&db, "auto-tag", &format!("alpha-{i}"), &["alpha"]);
464            seed_tagged_memory(&db, "auto-tag", &format!("beta-{i}"), &["beta"]);
465        }
466        let args = AutoConsolidateArgs {
467            namespace: Some("auto-tag".to_string()),
468            short_only: false,
469            min_count: 2,
470            dry_run: true,
471        };
472        {
473            let mut out = env.output();
474            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
475        }
476        let s = env.stdout_str();
477        assert!(s.contains("dry run"), "expected dry-run header, got: {s}");
478        // The text format prints JSON Value::String quoted: `[\"alpha\"]`.
479        assert!(
480            s.contains("\"alpha\"") || s.contains("\"beta\""),
481            "expected tag in output, got: {s}"
482        );
483    }
484
485    #[test]
486    fn test_auto_consolidate_short_only_skips_mid_tier() {
487        // Seed mid-tier memories; short_only filter excludes them.
488        let mut env = TestEnv::fresh();
489        let db = env.db_path.clone();
490        for i in 0..4 {
491            seed_memory(&db, "auto-short", &format!("s{i}"), &format!("b{i}"));
492        }
493        let args = AutoConsolidateArgs {
494            namespace: Some("auto-short".to_string()),
495            short_only: true,
496            min_count: 3,
497            dry_run: false,
498        };
499        {
500            let mut out = env.output();
501            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
502        }
503        // No short-tier rows — count must be 0.
504        assert!(env.stdout_str().contains("auto-consolidated 0"));
505    }
506
507    #[test]
508    fn test_auto_consolidate_no_namespace_walks_all() {
509        // Drives the `db::list_namespaces` branch (line 110) when
510        // args.namespace is None.
511        let mut env = TestEnv::fresh();
512        let db = env.db_path.clone();
513        for i in 0..3 {
514            seed_memory(&db, "auto-nons", &format!("t{i}"), "x");
515        }
516        let args = AutoConsolidateArgs {
517            namespace: None,
518            short_only: false,
519            min_count: 3,
520            dry_run: true,
521        };
522        {
523            let mut out = env.output();
524            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
525        }
526        assert!(env.stdout_str().contains("dry run"));
527    }
528
529    #[test]
530    fn test_consolidate_default_namespace_when_none() {
531        // Drives `helpers::resolve_namespace(args.namespace)` (#1590)
532        // — with no flag and no configured default the namespace
533        // bottoms out at whatever `auto_namespace()` yields.
534        let mut env = TestEnv::fresh();
535        let db = env.db_path.clone();
536        // Auto-namespace lookup — accept whatever it returns; the
537        // seeded memories live in the same namespace.
538        let ns = crate::cli::helpers::auto_namespace();
539        let id1 = seed_memory(&db, &ns, "x", "a");
540        let id2 = seed_memory(&db, &ns, "y", "b");
541        let args = ConsolidateArgs {
542            ids: format!("{id1},{id2}"),
543            title: "merged".to_string(),
544            summary: "summary text".to_string(),
545            namespace: None,
546        };
547        {
548            let mut out = env.output();
549            run(&db, args, false, Some("test-agent"), &mut out).unwrap();
550        }
551        assert!(env.stdout_str().contains("consolidated 2 memories"));
552    }
553
554    #[test]
555    fn test_auto_consolidate_multi_tag_membership_dedupes() {
556        // A memory tagged with both `alpha` and `beta` appears in both
557        // tag groups. Once the first tag group consolidates it, the
558        // second tag group's filter must skip it. The auto-consolidate
559        // pass should report 3 memories consolidated (alpha group),
560        // not 4 (alpha group + the multi-tag overlap counted twice).
561        let mut env = TestEnv::fresh();
562        let db = env.db_path.clone();
563        for i in 0..3 {
564            seed_tagged_memory(&db, "auto-multi", &format!("a-{i}"), &["alpha"]);
565        }
566        // One memory that lives in both groups.
567        seed_tagged_memory(&db, "auto-multi", "shared", &["alpha", "beta"]);
568        // Two more beta-only — without dedup this group would also
569        // trip threshold via the overlap; with dedup it stays at 2 (< 3).
570        for i in 0..2 {
571            seed_tagged_memory(&db, "auto-multi", &format!("b-{i}"), &["beta"]);
572        }
573        let args = AutoConsolidateArgs {
574            namespace: Some("auto-multi".to_string()),
575            short_only: false,
576            min_count: 3,
577            dry_run: false,
578        };
579        {
580            let mut out = env.output();
581            run_auto(&db, &args, false, Some("test-agent"), &mut out).unwrap();
582        }
583        let s = env.stdout_str();
584        // The exact count depends on HashMap iter order (tag groups
585        // are visited in arbitrary order). The robust assertion is
586        // that *something* was consolidated and the dedup loop ran.
587        assert!(s.contains("auto-consolidated"));
588    }
589}