Skip to main content

bamboo_memory/memory_store/
mod.rs

1use std::collections::{BTreeMap, BTreeSet, HashSet};
2use std::io;
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8pub mod freshness;
9pub mod paths;
10pub mod recall;
11pub mod store;
12pub mod types;
13
14pub use freshness::{
15    memory_age_days, memory_age_label, memory_freshness_text, render_memory_freshness_note,
16    FreshnessKind,
17};
18pub use paths::{MemoryPathResolver, SESSIONS_DIR, TOPICS_DIR};
19pub use recall::{
20    select_relevant_memories, shortlist_relevant_memories, MemoryRecallCandidate,
21    MemoryRecallOptions, MemoryRecallRerankContext, MemoryRecallSelection, MemoryRecallStrategy,
22};
23pub use store::MemoryStore;
24pub use types::{
25    BlobScanItem, BlobScanReport, CreatedBy, DuplicateCluster, DuplicateClusterMember,
26    DuplicateScanReport, DurableContentLocation, DurableMemoryDocument, DurableMemoryFrontmatter,
27    DurableMemoryRef, DurableMemoryRelations, DurableMemoryRetrieval, DurableMemorySource,
28    DurableMemoryStatus, DurableMemoryType, MemoryConsolidateResult, MemoryContradictionResult,
29    MemoryDuplicateCandidate, MemoryInspectResult, MemoryMergeResult, MemoryPurgeResult,
30    MemoryQueryCursor, MemoryQueryItem, MemoryQueryOptions, MemoryQueryResult, MemoryScope,
31    MemorySplitPiece, MemorySplitResult, SessionState,
32};
33
34pub const MEMORY_SCHEMA_VERSION: u32 = 1;
35pub const DEFAULT_SESSION_TOPIC: &str = "default";
36pub const MAX_SESSION_TOPIC_LEN: usize = 50;
37pub const MAX_MEMORY_TITLE_LEN: usize = 160;
38pub const MAX_MEMORY_TAGS: usize = 32;
39pub const DEFAULT_QUERY_LIMIT: usize = 5;
40pub const MAX_QUERY_LIMIT: usize = 20;
41pub const DEFAULT_MAX_CHARS: usize = 3_000;
42pub const MAX_MAX_CHARS: usize = 6_000;
43pub const WRITE_AUDIT_LOG: &str = "write_audit.jsonl";
44pub const MERGE_AUDIT_LOG: &str = "merge_audit.jsonl";
45pub const PURGE_AUDIT_LOG: &str = "purge_audit.jsonl";
46pub const CONTRADICTION_AUDIT_LOG: &str = "contradiction_audit.jsonl";
47pub const DREAM_VIEW_FILE: &str = "DREAM_NOTEBOOK.md";
48pub const MEMORY_VIEW_FILE: &str = "MEMORY.md";
49pub const RECENT_VIEW_FILE: &str = "RECENT.md";
50pub const STALE_VIEW_FILE: &str = "STALE.md";
51pub const LEXICAL_INDEX_FILE: &str = "lexical.json";
52pub const GRAPH_INDEX_FILE: &str = "graph.json";
53pub const RECENT_INDEX_FILE: &str = "recent.json";
54pub const STALE_CANDIDATES_INDEX_FILE: &str = "stale_candidates.json";
55pub const TAXONOMY_INDEX_FILE: &str = "taxonomy.json";
56
57pub fn validate_session_id(session_id: &str) -> io::Result<&str> {
58    let trimmed = session_id.trim();
59    if trimmed.is_empty() {
60        return Err(io::Error::new(
61            io::ErrorKind::InvalidInput,
62            "session_id cannot be empty",
63        ));
64    }
65    if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
66        return Err(io::Error::new(
67            io::ErrorKind::InvalidInput,
68            "session_id contains invalid path characters",
69        ));
70    }
71    if !trimmed
72        .chars()
73        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.')
74    {
75        return Err(io::Error::new(
76            io::ErrorKind::InvalidInput,
77            "session_id contains unsupported characters",
78        ));
79    }
80    Ok(trimmed)
81}
82
83pub fn validate_session_topic(topic: &str) -> io::Result<&str> {
84    let trimmed = topic.trim();
85    if trimmed.is_empty() {
86        return Err(io::Error::new(
87            io::ErrorKind::InvalidInput,
88            "topic cannot be empty",
89        ));
90    }
91    if trimmed.len() > MAX_SESSION_TOPIC_LEN {
92        return Err(io::Error::new(
93            io::ErrorKind::InvalidInput,
94            format!(
95                "topic name too long (max {} chars, got {})",
96                MAX_SESSION_TOPIC_LEN,
97                trimmed.len()
98            ),
99        ));
100    }
101    if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
102        return Err(io::Error::new(
103            io::ErrorKind::InvalidInput,
104            "topic contains invalid path characters",
105        ));
106    }
107    if !trimmed
108        .chars()
109        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
110    {
111        return Err(io::Error::new(
112            io::ErrorKind::InvalidInput,
113            "topic must contain only alphanumeric, dash, or underscore characters",
114        ));
115    }
116    Ok(trimmed)
117}
118
119pub fn validate_memory_title(title: &str) -> io::Result<&str> {
120    let trimmed = title.trim();
121    if trimmed.is_empty() {
122        return Err(io::Error::new(
123            io::ErrorKind::InvalidInput,
124            "title cannot be empty",
125        ));
126    }
127    if trimmed.chars().count() > MAX_MEMORY_TITLE_LEN {
128        return Err(io::Error::new(
129            io::ErrorKind::InvalidInput,
130            format!("title too long (max {} chars)", MAX_MEMORY_TITLE_LEN),
131        ));
132    }
133    Ok(trimmed)
134}
135
136pub fn normalize_tag(tag: &str) -> Option<String> {
137    let trimmed = tag.trim();
138    if trimmed.is_empty() {
139        return None;
140    }
141    let mut out = String::with_capacity(trimmed.len());
142    let mut prev_dash = false;
143    for ch in trimmed.chars() {
144        let normalized = match ch {
145            'A'..='Z' => ch.to_ascii_lowercase(),
146            'a'..='z' | '0'..='9' => ch,
147            '-' | '_' | ' ' | '.' | '/' => '-',
148            _ => continue,
149        };
150        if normalized == '-' {
151            if prev_dash {
152                continue;
153            }
154            prev_dash = true;
155            out.push(normalized);
156        } else {
157            prev_dash = false;
158            out.push(normalized);
159        }
160    }
161    let normalized = out.trim_matches('-').to_string();
162    (!normalized.is_empty()).then_some(normalized)
163}
164
165pub fn normalize_tags<I, S>(tags: I) -> Vec<String>
166where
167    I: IntoIterator<Item = S>,
168    S: AsRef<str>,
169{
170    let mut seen = BTreeSet::new();
171    for tag in tags {
172        if let Some(tag) = normalize_tag(tag.as_ref()) {
173            seen.insert(tag);
174            if seen.len() >= MAX_MEMORY_TAGS {
175                break;
176            }
177        }
178    }
179    seen.into_iter().collect()
180}
181
182pub fn truncate_chars(value: &str, max_chars: usize) -> (String, bool) {
183    let mut out = String::new();
184    for (count, ch) in value.chars().enumerate() {
185        if count >= max_chars {
186            return (out, true);
187        }
188        out.push(ch);
189    }
190    (out, false)
191}
192
193pub fn count_chars(value: &str) -> usize {
194    value.chars().count()
195}
196
197pub fn now_rfc3339() -> String {
198    Utc::now().to_rfc3339()
199}
200
201pub fn derive_summary(content: &str, max_chars: usize) -> String {
202    let collapsed = content
203        .lines()
204        .map(str::trim)
205        .filter(|line| !line.is_empty())
206        .collect::<Vec<_>>()
207        .join(" ");
208    let (summary, truncated) = truncate_chars(&collapsed, max_chars);
209    if truncated {
210        format!("{}...", summary.trim_end())
211    } else {
212        summary
213    }
214}
215
216pub fn extract_keywords(title: &str, content: &str, tags: &[String]) -> Vec<String> {
217    let mut seen = BTreeSet::new();
218    for tag in tags {
219        if let Some(tag) = normalize_tag(tag) {
220            seen.insert(tag);
221        }
222    }
223
224    let combined = format!("{}\n{}", title, content);
225    let mut current = String::new();
226    for ch in combined.chars() {
227        if ch.is_ascii_alphanumeric() {
228            current.push(ch.to_ascii_lowercase());
229            continue;
230        }
231        if current.len() >= 3 {
232            seen.insert(current.clone());
233        }
234        current.clear();
235    }
236    if current.len() >= 3 {
237        seen.insert(current);
238    }
239
240    seen.into_iter().take(128).collect()
241}
242
243pub fn detect_entities(title: &str, content: &str) -> Vec<String> {
244    let mut entities = BTreeSet::new();
245    for token in format!("{}\n{}", title, content)
246        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/'))
247    {
248        let trimmed = token.trim();
249        if trimmed.len() < 3 {
250            continue;
251        }
252        let has_upper = trimmed.chars().any(|ch| ch.is_ascii_uppercase());
253        let has_separator = trimmed.contains('-') || trimmed.contains('_') || trimmed.contains('/');
254        if has_upper || has_separator {
255            entities.insert(trimmed.to_string());
256        }
257    }
258    entities.into_iter().take(64).collect()
259}
260
261pub fn sanitize_component(input: &str) -> String {
262    let trimmed = input.trim();
263    if trimmed.is_empty() {
264        return "unknown".to_string();
265    }
266
267    let mut out = String::with_capacity(trimmed.len());
268    let mut prev_dash = false;
269    for ch in trimmed.chars() {
270        let normalized = match ch {
271            'A'..='Z' => ch.to_ascii_lowercase(),
272            'a'..='z' | '0'..='9' => ch,
273            _ => '-',
274        };
275        if normalized == '-' {
276            if prev_dash {
277                continue;
278            }
279            prev_dash = true;
280            out.push('-');
281        } else {
282            prev_dash = false;
283            out.push(normalized);
284        }
285    }
286
287    let out = out.trim_matches('-').to_string();
288    if out.is_empty() {
289        "unknown".to_string()
290    } else {
291        out
292    }
293}
294
295pub fn project_key_from_path(path: &Path) -> String {
296    let canonical = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
297
298    if let Some(root) = find_git_root(&canonical) {
299        if let Some(name) = root.file_name().and_then(|value| value.to_str()) {
300            let mut key = sanitize_component(name);
301            if let Some(hash) =
302                short_stable_hash(&bamboo_infrastructure::paths::path_to_display_string(&root))
303            {
304                key.push('-');
305                key.push_str(&hash);
306            }
307            return key;
308        }
309    }
310
311    if let Some(name) = canonical.file_name().and_then(|value| value.to_str()) {
312        let mut key = sanitize_component(name);
313        if let Some(hash) = short_stable_hash(
314            &bamboo_infrastructure::paths::path_to_display_string(&canonical),
315        ) {
316            key.push('-');
317            key.push_str(&hash);
318        }
319        return key;
320    }
321
322    let raw = bamboo_infrastructure::paths::path_to_display_string(&canonical);
323    format!(
324        "path-{}",
325        short_stable_hash(&raw).unwrap_or_else(|| "unknown".to_string())
326    )
327}
328
329pub fn find_git_root(start: &Path) -> Option<PathBuf> {
330    for ancestor in start.ancestors() {
331        let git_dir = ancestor.join(".git");
332        if git_dir.is_dir() || git_dir.is_file() {
333            return Some(ancestor.to_path_buf());
334        }
335    }
336    None
337}
338
339pub fn short_stable_hash(input: &str) -> Option<String> {
340    use std::hash::{Hash, Hasher};
341
342    let trimmed = input.trim();
343    if trimmed.is_empty() {
344        return None;
345    }
346    let mut hasher = std::collections::hash_map::DefaultHasher::new();
347    trimmed.hash(&mut hasher);
348    Some(format!("{:08x}", (hasher.finish() & 0xffff_ffff) as u32))
349}
350
351pub fn build_yaml_frontmatter(frontmatter: &DurableMemoryFrontmatter) -> io::Result<String> {
352    serde_yaml::to_string(frontmatter).map_err(|error| {
353        io::Error::new(
354            io::ErrorKind::InvalidData,
355            format!("failed to serialize memory frontmatter: {error}"),
356        )
357    })
358}
359
360pub fn parse_markdown_document(content: &str) -> io::Result<(DurableMemoryFrontmatter, String)> {
361    let trimmed = content.trim_start_matches('\u{feff}');
362    let Some(rest) = trimmed.strip_prefix("---\n") else {
363        return Err(io::Error::new(
364            io::ErrorKind::InvalidData,
365            "missing frontmatter start marker",
366        ));
367    };
368    let Some(end_idx) = rest.find("\n---\n") else {
369        return Err(io::Error::new(
370            io::ErrorKind::InvalidData,
371            "missing frontmatter end marker",
372        ));
373    };
374    let yaml = &rest[..end_idx];
375    let body = &rest[end_idx + "\n---\n".len()..];
376    let frontmatter: DurableMemoryFrontmatter = serde_yaml::from_str(yaml).map_err(|error| {
377        io::Error::new(
378            io::ErrorKind::InvalidData,
379            format!("failed to parse memory frontmatter: {error}"),
380        )
381    })?;
382    Ok((frontmatter, body.trim().to_string()))
383}
384
385pub fn render_markdown_document(
386    frontmatter: &DurableMemoryFrontmatter,
387    body: &str,
388) -> io::Result<String> {
389    let yaml = build_yaml_frontmatter(frontmatter)?;
390    Ok(format!("---\n{}---\n\n{}\n", yaml, body.trim()))
391}
392
393#[derive(Debug, Clone, Serialize, Deserialize, Default)]
394pub struct LexicalIndex {
395    pub generated_at: String,
396    pub items: Vec<LexicalIndexItem>,
397}
398
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct LexicalIndexItem {
401    pub id: String,
402    pub title: String,
403    pub scope: MemoryScope,
404    pub project_key: Option<String>,
405    pub r#type: DurableMemoryType,
406    pub status: DurableMemoryStatus,
407    pub tags: Vec<String>,
408    pub keywords: Vec<String>,
409    pub entities: Vec<String>,
410    pub updated_at: String,
411    pub created_at: String,
412    pub summary: String,
413}
414
415#[derive(Debug, Clone, Serialize, Deserialize, Default)]
416pub struct RecentIndex {
417    pub generated_at: String,
418    pub items: Vec<RecentIndexItem>,
419}
420
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct RecentIndexItem {
423    pub id: String,
424    pub title: String,
425    pub updated_at: String,
426    pub last_accessed_at: Option<String>,
427    pub status: DurableMemoryStatus,
428}
429
430#[derive(Debug, Clone, Serialize, Deserialize, Default)]
431pub struct GraphIndex {
432    pub generated_at: String,
433    pub items: Vec<GraphIndexItem>,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct GraphIndexItem {
438    pub id: String,
439    pub related: Vec<String>,
440    pub supersedes: Vec<String>,
441    pub contradicted_by: Vec<String>,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize, Default)]
445pub struct StaleCandidatesIndex {
446    pub generated_at: String,
447    pub items: Vec<StaleCandidateItem>,
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize)]
451pub struct StaleCandidateItem {
452    pub id: String,
453    pub title: String,
454    pub status: DurableMemoryStatus,
455    pub updated_at: String,
456    pub reason: String,
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize, Default)]
460pub struct TaxonomyIndex {
461    pub generated_at: String,
462    pub by_type: BTreeMap<String, usize>,
463    pub by_status: BTreeMap<String, usize>,
464    pub by_scope: BTreeMap<String, usize>,
465    pub total: usize,
466}
467
468#[derive(Debug, Clone, Serialize, Deserialize)]
469pub struct AuditLogEntry {
470    pub timestamp: String,
471    pub action: String,
472    pub scope: MemoryScope,
473    pub memory_id: Option<String>,
474    pub session_id: Option<String>,
475    pub topic: Option<String>,
476    pub summary: String,
477    #[serde(default, skip_serializing_if = "Option::is_none")]
478    pub metadata: Option<serde_json::Value>,
479}
480
481pub fn parse_rfc3339(value: &str) -> Option<DateTime<Utc>> {
482    chrono::DateTime::parse_from_rfc3339(value)
483        .ok()
484        .map(|dt| dt.with_timezone(&Utc))
485}
486
487pub fn sort_memories_desc(memories: &mut [DurableMemoryDocument]) {
488    memories.sort_by(|left, right| {
489        let left_dt =
490            parse_rfc3339(&left.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
491        let right_dt =
492            parse_rfc3339(&right.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
493        right_dt
494            .cmp(&left_dt)
495            .then_with(|| left.frontmatter.id.cmp(&right.frontmatter.id))
496    });
497}
498
499pub fn match_memory_query(
500    doc: &DurableMemoryDocument,
501    query: Option<&str>,
502    filter_types: Option<&HashSet<DurableMemoryType>>,
503    filter_statuses: Option<&HashSet<DurableMemoryStatus>>,
504) -> Option<f64> {
505    if let Some(types) = filter_types {
506        if !types.contains(&doc.frontmatter.r#type) {
507            return None;
508        }
509    }
510    if let Some(statuses) = filter_statuses {
511        if !statuses.contains(&doc.frontmatter.status) {
512            return None;
513        }
514    }
515
516    let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
517        return Some(1.0);
518    };
519
520    let query_tokens = extract_keywords(query, "", &[]);
521    if query_tokens.is_empty() {
522        return Some(1.0);
523    }
524
525    let title = doc.frontmatter.title.to_ascii_lowercase();
526    let body = doc.body.to_ascii_lowercase();
527    let keywords: HashSet<String> = doc
528        .frontmatter
529        .retrieval
530        .keywords
531        .iter()
532        .map(|value| value.to_ascii_lowercase())
533        .collect();
534    let tags: HashSet<String> = doc
535        .frontmatter
536        .tags
537        .iter()
538        .map(|value| value.to_ascii_lowercase())
539        .collect();
540    let entities: HashSet<String> = doc
541        .frontmatter
542        .retrieval
543        .entities
544        .iter()
545        .map(|value| value.to_ascii_lowercase())
546        .collect();
547
548    let mut score = 0.0;
549    let mut matched_any = false;
550    for token in &query_tokens {
551        let mut token_score = 0.0;
552        if title.contains(token) {
553            token_score += 3.0;
554        }
555        if keywords.contains(token) {
556            token_score += 2.5;
557        }
558        if tags.contains(token) {
559            token_score += 2.0;
560        }
561        if entities.contains(token) {
562            token_score += 1.5;
563        }
564        if body.contains(token) {
565            token_score += 1.0;
566        }
567        if token_score > 0.0 {
568            matched_any = true;
569            score += token_score;
570        }
571    }
572
573    matched_any.then_some(score / query_tokens.len() as f64)
574}
575
576pub fn build_memory_markdown_view(
577    scope: MemoryScope,
578    project_key: Option<&str>,
579    docs: &[DurableMemoryDocument],
580) -> String {
581    let title = match scope {
582        MemoryScope::Global => "# Bamboo Memory Index (Global)".to_string(),
583        MemoryScope::Project => format!(
584            "# Bamboo Memory Index (Project: {})",
585            project_key.unwrap_or("unknown")
586        ),
587        MemoryScope::Session => "# Bamboo Memory Index (Session)".to_string(),
588    };
589    let mut out = String::new();
590    out.push_str(&title);
591    out.push_str("\n\n");
592    if docs.is_empty() {
593        out.push_str("_(empty)_\n");
594        return out;
595    }
596
597    for doc in docs {
598        out.push_str(&format!(
599            "- `{}` {} [{} / {}] updated {}\n",
600            doc.frontmatter.id,
601            doc.frontmatter.title,
602            doc.frontmatter.r#type.as_str(),
603            doc.frontmatter.status.as_str(),
604            doc.frontmatter.updated_at,
605        ));
606        let summary = derive_summary(&doc.body, 160);
607        if !summary.is_empty() {
608            out.push_str(&format!("  - {}\n", summary));
609        }
610    }
611    out
612}
613
614pub fn build_recent_markdown_view(docs: &[DurableMemoryDocument]) -> String {
615    let mut out = String::from("# Recent Memory Updates\n\n");
616    if docs.is_empty() {
617        out.push_str("_(empty)_\n");
618        return out;
619    }
620    for doc in docs.iter().take(20) {
621        out.push_str(&format!(
622            "- `{}` {} — {}\n",
623            doc.frontmatter.id, doc.frontmatter.title, doc.frontmatter.updated_at
624        ));
625    }
626    out
627}
628
629pub fn build_stale_markdown_view(docs: &[DurableMemoryDocument]) -> String {
630    let mut out = String::from("# Stale Memory Candidates\n\n");
631    let stale: Vec<_> = docs
632        .iter()
633        .filter(|doc| doc.frontmatter.status != DurableMemoryStatus::Active)
634        .collect();
635    if stale.is_empty() {
636        out.push_str("_(no stale items)_\n");
637        return out;
638    }
639    for doc in stale {
640        out.push_str(&format!(
641            "- `{}` {} [{}]\n",
642            doc.frontmatter.id,
643            doc.frontmatter.title,
644            doc.frontmatter.status.as_str()
645        ));
646    }
647    out
648}
649
650pub fn build_dream_view(existing: Option<&str>) -> String {
651    match existing.map(str::trim).filter(|value| !value.is_empty()) {
652        Some(value) => value.to_string(),
653        None => "# Bamboo Dream Notebook\n\n_(empty)_\n".to_string(),
654    }
655}
656
657pub fn parse_query_cursor(cursor: Option<&str>) -> usize {
658    cursor
659        .and_then(|raw| raw.rsplit(':').next())
660        .and_then(|raw| raw.parse::<usize>().ok())
661        .unwrap_or(0)
662}
663
664pub fn make_query_cursor(scope: MemoryScope, offset: usize) -> String {
665    format!("{}:{}", scope.as_str(), offset)
666}
667
668pub fn summary_json(items: usize, total: usize) -> String {
669    if total == 0 {
670        "No matching memories found.".to_string()
671    } else {
672        format!("Returned top {} of {} matching memories.", items, total)
673    }
674}
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679
680    #[test]
681    fn normalize_tags_dedupes_and_sanitizes() {
682        let tags = normalize_tags(["User Preference", "user-preference", "release/freeze"]);
683        assert_eq!(tags, vec!["release-freeze", "user-preference"]);
684    }
685
686    #[test]
687    fn project_key_from_path_is_stable() {
688        let key = project_key_from_path(Path::new("/tmp/My Project"));
689        assert!(key.starts_with("my-project-"));
690    }
691
692    #[test]
693    fn parse_markdown_document_requires_frontmatter() {
694        let result = parse_markdown_document("plain body");
695        assert!(result.is_err());
696    }
697}