Skip to main content

bamboo_memory/memory_store/
mod.rs

1use std::collections::{BTreeMap, BTreeSet, HashSet};
2use std::io;
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8pub mod freshness;
9pub mod paths;
10pub mod recall;
11pub mod store;
12pub mod types;
13
14pub use freshness::{
15    memory_age_days, memory_age_label, memory_freshness_text, render_memory_freshness_note,
16    FreshnessKind,
17};
18pub use paths::{MemoryPathResolver, SESSIONS_DIR, TOPICS_DIR};
19pub use recall::{
20    select_relevant_memories, shortlist_relevant_memories, MemoryRecallCandidate,
21    MemoryRecallOptions, MemoryRecallRerankContext, MemoryRecallSelection, MemoryRecallStrategy,
22};
23pub use store::MemoryStore;
24pub use types::{
25    CreatedBy, DurableContentLocation, DurableMemoryDocument, DurableMemoryFrontmatter,
26    DurableMemoryRef, DurableMemoryRelations, DurableMemoryRetrieval, DurableMemorySource,
27    DurableMemoryStatus, DurableMemoryType, MemoryContradictionResult, MemoryInspectResult,
28    MemoryMergeResult, MemoryPurgeResult, MemoryQueryCursor, MemoryQueryItem, MemoryQueryOptions,
29    MemoryQueryResult, MemoryScope, SessionState,
30};
31
32pub const MEMORY_SCHEMA_VERSION: u32 = 1;
33pub const DEFAULT_SESSION_TOPIC: &str = "default";
34pub const MAX_SESSION_TOPIC_LEN: usize = 50;
35pub const MAX_MEMORY_TITLE_LEN: usize = 160;
36pub const MAX_MEMORY_TAGS: usize = 32;
37pub const DEFAULT_QUERY_LIMIT: usize = 5;
38pub const MAX_QUERY_LIMIT: usize = 20;
39pub const DEFAULT_MAX_CHARS: usize = 3_000;
40pub const MAX_MAX_CHARS: usize = 6_000;
41pub const WRITE_AUDIT_LOG: &str = "write_audit.jsonl";
42pub const MERGE_AUDIT_LOG: &str = "merge_audit.jsonl";
43pub const PURGE_AUDIT_LOG: &str = "purge_audit.jsonl";
44pub const CONTRADICTION_AUDIT_LOG: &str = "contradiction_audit.jsonl";
45pub const DREAM_VIEW_FILE: &str = "DREAM_NOTEBOOK.md";
46pub const MEMORY_VIEW_FILE: &str = "MEMORY.md";
47pub const RECENT_VIEW_FILE: &str = "RECENT.md";
48pub const STALE_VIEW_FILE: &str = "STALE.md";
49pub const LEXICAL_INDEX_FILE: &str = "lexical.json";
50pub const GRAPH_INDEX_FILE: &str = "graph.json";
51pub const RECENT_INDEX_FILE: &str = "recent.json";
52pub const STALE_CANDIDATES_INDEX_FILE: &str = "stale_candidates.json";
53pub const TAXONOMY_INDEX_FILE: &str = "taxonomy.json";
54
55pub fn validate_session_id(session_id: &str) -> io::Result<&str> {
56    let trimmed = session_id.trim();
57    if trimmed.is_empty() {
58        return Err(io::Error::new(
59            io::ErrorKind::InvalidInput,
60            "session_id cannot be empty",
61        ));
62    }
63    if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
64        return Err(io::Error::new(
65            io::ErrorKind::InvalidInput,
66            "session_id contains invalid path characters",
67        ));
68    }
69    if !trimmed
70        .chars()
71        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.')
72    {
73        return Err(io::Error::new(
74            io::ErrorKind::InvalidInput,
75            "session_id contains unsupported characters",
76        ));
77    }
78    Ok(trimmed)
79}
80
81pub fn validate_session_topic(topic: &str) -> io::Result<&str> {
82    let trimmed = topic.trim();
83    if trimmed.is_empty() {
84        return Err(io::Error::new(
85            io::ErrorKind::InvalidInput,
86            "topic cannot be empty",
87        ));
88    }
89    if trimmed.len() > MAX_SESSION_TOPIC_LEN {
90        return Err(io::Error::new(
91            io::ErrorKind::InvalidInput,
92            format!(
93                "topic name too long (max {} chars, got {})",
94                MAX_SESSION_TOPIC_LEN,
95                trimmed.len()
96            ),
97        ));
98    }
99    if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
100        return Err(io::Error::new(
101            io::ErrorKind::InvalidInput,
102            "topic contains invalid path characters",
103        ));
104    }
105    if !trimmed
106        .chars()
107        .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
108    {
109        return Err(io::Error::new(
110            io::ErrorKind::InvalidInput,
111            "topic must contain only alphanumeric, dash, or underscore characters",
112        ));
113    }
114    Ok(trimmed)
115}
116
117pub fn validate_memory_title(title: &str) -> io::Result<&str> {
118    let trimmed = title.trim();
119    if trimmed.is_empty() {
120        return Err(io::Error::new(
121            io::ErrorKind::InvalidInput,
122            "title cannot be empty",
123        ));
124    }
125    if trimmed.chars().count() > MAX_MEMORY_TITLE_LEN {
126        return Err(io::Error::new(
127            io::ErrorKind::InvalidInput,
128            format!("title too long (max {} chars)", MAX_MEMORY_TITLE_LEN),
129        ));
130    }
131    Ok(trimmed)
132}
133
134pub fn normalize_tag(tag: &str) -> Option<String> {
135    let trimmed = tag.trim();
136    if trimmed.is_empty() {
137        return None;
138    }
139    let mut out = String::with_capacity(trimmed.len());
140    let mut prev_dash = false;
141    for ch in trimmed.chars() {
142        let normalized = match ch {
143            'A'..='Z' => ch.to_ascii_lowercase(),
144            'a'..='z' | '0'..='9' => ch,
145            '-' | '_' | ' ' | '.' | '/' => '-',
146            _ => continue,
147        };
148        if normalized == '-' {
149            if prev_dash {
150                continue;
151            }
152            prev_dash = true;
153            out.push(normalized);
154        } else {
155            prev_dash = false;
156            out.push(normalized);
157        }
158    }
159    let normalized = out.trim_matches('-').to_string();
160    (!normalized.is_empty()).then_some(normalized)
161}
162
163pub fn normalize_tags<I, S>(tags: I) -> Vec<String>
164where
165    I: IntoIterator<Item = S>,
166    S: AsRef<str>,
167{
168    let mut seen = BTreeSet::new();
169    for tag in tags {
170        if let Some(tag) = normalize_tag(tag.as_ref()) {
171            seen.insert(tag);
172            if seen.len() >= MAX_MEMORY_TAGS {
173                break;
174            }
175        }
176    }
177    seen.into_iter().collect()
178}
179
180pub fn truncate_chars(value: &str, max_chars: usize) -> (String, bool) {
181    let mut out = String::new();
182    for (count, ch) in value.chars().enumerate() {
183        if count >= max_chars {
184            return (out, true);
185        }
186        out.push(ch);
187    }
188    (out, false)
189}
190
191pub fn count_chars(value: &str) -> usize {
192    value.chars().count()
193}
194
195pub fn now_rfc3339() -> String {
196    Utc::now().to_rfc3339()
197}
198
199pub fn derive_summary(content: &str, max_chars: usize) -> String {
200    let collapsed = content
201        .lines()
202        .map(str::trim)
203        .filter(|line| !line.is_empty())
204        .collect::<Vec<_>>()
205        .join(" ");
206    let (summary, truncated) = truncate_chars(&collapsed, max_chars);
207    if truncated {
208        format!("{}...", summary.trim_end())
209    } else {
210        summary
211    }
212}
213
214pub fn extract_keywords(title: &str, content: &str, tags: &[String]) -> Vec<String> {
215    let mut seen = BTreeSet::new();
216    for tag in tags {
217        if let Some(tag) = normalize_tag(tag) {
218            seen.insert(tag);
219        }
220    }
221
222    let combined = format!("{}\n{}", title, content);
223    let mut current = String::new();
224    for ch in combined.chars() {
225        if ch.is_ascii_alphanumeric() {
226            current.push(ch.to_ascii_lowercase());
227            continue;
228        }
229        if current.len() >= 3 {
230            seen.insert(current.clone());
231        }
232        current.clear();
233    }
234    if current.len() >= 3 {
235        seen.insert(current);
236    }
237
238    seen.into_iter().take(128).collect()
239}
240
241pub fn detect_entities(title: &str, content: &str) -> Vec<String> {
242    let mut entities = BTreeSet::new();
243    for token in format!("{}\n{}", title, content)
244        .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/'))
245    {
246        let trimmed = token.trim();
247        if trimmed.len() < 3 {
248            continue;
249        }
250        let has_upper = trimmed.chars().any(|ch| ch.is_ascii_uppercase());
251        let has_separator = trimmed.contains('-') || trimmed.contains('_') || trimmed.contains('/');
252        if has_upper || has_separator {
253            entities.insert(trimmed.to_string());
254        }
255    }
256    entities.into_iter().take(64).collect()
257}
258
259pub fn sanitize_component(input: &str) -> String {
260    let trimmed = input.trim();
261    if trimmed.is_empty() {
262        return "unknown".to_string();
263    }
264
265    let mut out = String::with_capacity(trimmed.len());
266    let mut prev_dash = false;
267    for ch in trimmed.chars() {
268        let normalized = match ch {
269            'A'..='Z' => ch.to_ascii_lowercase(),
270            'a'..='z' | '0'..='9' => ch,
271            _ => '-',
272        };
273        if normalized == '-' {
274            if prev_dash {
275                continue;
276            }
277            prev_dash = true;
278            out.push('-');
279        } else {
280            prev_dash = false;
281            out.push(normalized);
282        }
283    }
284
285    let out = out.trim_matches('-').to_string();
286    if out.is_empty() {
287        "unknown".to_string()
288    } else {
289        out
290    }
291}
292
293pub fn project_key_from_path(path: &Path) -> String {
294    let canonical = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
295
296    if let Some(root) = find_git_root(&canonical) {
297        if let Some(name) = root.file_name().and_then(|value| value.to_str()) {
298            let mut key = sanitize_component(name);
299            if let Some(hash) =
300                short_stable_hash(&bamboo_infrastructure::paths::path_to_display_string(&root))
301            {
302                key.push('-');
303                key.push_str(&hash);
304            }
305            return key;
306        }
307    }
308
309    if let Some(name) = canonical.file_name().and_then(|value| value.to_str()) {
310        let mut key = sanitize_component(name);
311        if let Some(hash) = short_stable_hash(
312            &bamboo_infrastructure::paths::path_to_display_string(&canonical),
313        ) {
314            key.push('-');
315            key.push_str(&hash);
316        }
317        return key;
318    }
319
320    let raw = bamboo_infrastructure::paths::path_to_display_string(&canonical);
321    format!(
322        "path-{}",
323        short_stable_hash(&raw).unwrap_or_else(|| "unknown".to_string())
324    )
325}
326
327pub fn find_git_root(start: &Path) -> Option<PathBuf> {
328    for ancestor in start.ancestors() {
329        let git_dir = ancestor.join(".git");
330        if git_dir.is_dir() || git_dir.is_file() {
331            return Some(ancestor.to_path_buf());
332        }
333    }
334    None
335}
336
337pub fn short_stable_hash(input: &str) -> Option<String> {
338    use std::hash::{Hash, Hasher};
339
340    let trimmed = input.trim();
341    if trimmed.is_empty() {
342        return None;
343    }
344    let mut hasher = std::collections::hash_map::DefaultHasher::new();
345    trimmed.hash(&mut hasher);
346    Some(format!("{:08x}", (hasher.finish() & 0xffff_ffff) as u32))
347}
348
349pub fn build_yaml_frontmatter(frontmatter: &DurableMemoryFrontmatter) -> io::Result<String> {
350    serde_yaml::to_string(frontmatter).map_err(|error| {
351        io::Error::new(
352            io::ErrorKind::InvalidData,
353            format!("failed to serialize memory frontmatter: {error}"),
354        )
355    })
356}
357
358pub fn parse_markdown_document(content: &str) -> io::Result<(DurableMemoryFrontmatter, String)> {
359    let trimmed = content.trim_start_matches('\u{feff}');
360    let Some(rest) = trimmed.strip_prefix("---\n") else {
361        return Err(io::Error::new(
362            io::ErrorKind::InvalidData,
363            "missing frontmatter start marker",
364        ));
365    };
366    let Some(end_idx) = rest.find("\n---\n") else {
367        return Err(io::Error::new(
368            io::ErrorKind::InvalidData,
369            "missing frontmatter end marker",
370        ));
371    };
372    let yaml = &rest[..end_idx];
373    let body = &rest[end_idx + "\n---\n".len()..];
374    let frontmatter: DurableMemoryFrontmatter = serde_yaml::from_str(yaml).map_err(|error| {
375        io::Error::new(
376            io::ErrorKind::InvalidData,
377            format!("failed to parse memory frontmatter: {error}"),
378        )
379    })?;
380    Ok((frontmatter, body.trim().to_string()))
381}
382
383pub fn render_markdown_document(
384    frontmatter: &DurableMemoryFrontmatter,
385    body: &str,
386) -> io::Result<String> {
387    let yaml = build_yaml_frontmatter(frontmatter)?;
388    Ok(format!("---\n{}---\n\n{}\n", yaml, body.trim()))
389}
390
391#[derive(Debug, Clone, Serialize, Deserialize, Default)]
392pub struct LexicalIndex {
393    pub generated_at: String,
394    pub items: Vec<LexicalIndexItem>,
395}
396
397#[derive(Debug, Clone, Serialize, Deserialize)]
398pub struct LexicalIndexItem {
399    pub id: String,
400    pub title: String,
401    pub scope: MemoryScope,
402    pub project_key: Option<String>,
403    pub r#type: DurableMemoryType,
404    pub status: DurableMemoryStatus,
405    pub tags: Vec<String>,
406    pub keywords: Vec<String>,
407    pub entities: Vec<String>,
408    pub updated_at: String,
409    pub created_at: String,
410    pub summary: String,
411}
412
413#[derive(Debug, Clone, Serialize, Deserialize, Default)]
414pub struct RecentIndex {
415    pub generated_at: String,
416    pub items: Vec<RecentIndexItem>,
417}
418
419#[derive(Debug, Clone, Serialize, Deserialize)]
420pub struct RecentIndexItem {
421    pub id: String,
422    pub title: String,
423    pub updated_at: String,
424    pub last_accessed_at: Option<String>,
425    pub status: DurableMemoryStatus,
426}
427
428#[derive(Debug, Clone, Serialize, Deserialize, Default)]
429pub struct GraphIndex {
430    pub generated_at: String,
431    pub items: Vec<GraphIndexItem>,
432}
433
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct GraphIndexItem {
436    pub id: String,
437    pub related: Vec<String>,
438    pub supersedes: Vec<String>,
439    pub contradicted_by: Vec<String>,
440}
441
442#[derive(Debug, Clone, Serialize, Deserialize, Default)]
443pub struct StaleCandidatesIndex {
444    pub generated_at: String,
445    pub items: Vec<StaleCandidateItem>,
446}
447
448#[derive(Debug, Clone, Serialize, Deserialize)]
449pub struct StaleCandidateItem {
450    pub id: String,
451    pub title: String,
452    pub status: DurableMemoryStatus,
453    pub updated_at: String,
454    pub reason: String,
455}
456
457#[derive(Debug, Clone, Serialize, Deserialize, Default)]
458pub struct TaxonomyIndex {
459    pub generated_at: String,
460    pub by_type: BTreeMap<String, usize>,
461    pub by_status: BTreeMap<String, usize>,
462    pub by_scope: BTreeMap<String, usize>,
463    pub total: usize,
464}
465
466#[derive(Debug, Clone, Serialize, Deserialize)]
467pub struct AuditLogEntry {
468    pub timestamp: String,
469    pub action: String,
470    pub scope: MemoryScope,
471    pub memory_id: Option<String>,
472    pub session_id: Option<String>,
473    pub topic: Option<String>,
474    pub summary: String,
475    #[serde(default, skip_serializing_if = "Option::is_none")]
476    pub metadata: Option<serde_json::Value>,
477}
478
479pub fn parse_rfc3339(value: &str) -> Option<DateTime<Utc>> {
480    chrono::DateTime::parse_from_rfc3339(value)
481        .ok()
482        .map(|dt| dt.with_timezone(&Utc))
483}
484
485pub fn sort_memories_desc(memories: &mut [DurableMemoryDocument]) {
486    memories.sort_by(|left, right| {
487        let left_dt =
488            parse_rfc3339(&left.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
489        let right_dt =
490            parse_rfc3339(&right.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
491        right_dt
492            .cmp(&left_dt)
493            .then_with(|| left.frontmatter.id.cmp(&right.frontmatter.id))
494    });
495}
496
497pub fn match_memory_query(
498    doc: &DurableMemoryDocument,
499    query: Option<&str>,
500    filter_types: Option<&HashSet<DurableMemoryType>>,
501    filter_statuses: Option<&HashSet<DurableMemoryStatus>>,
502) -> Option<f64> {
503    if let Some(types) = filter_types {
504        if !types.contains(&doc.frontmatter.r#type) {
505            return None;
506        }
507    }
508    if let Some(statuses) = filter_statuses {
509        if !statuses.contains(&doc.frontmatter.status) {
510            return None;
511        }
512    }
513
514    let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
515        return Some(1.0);
516    };
517
518    let query_tokens = extract_keywords(query, "", &[]);
519    if query_tokens.is_empty() {
520        return Some(1.0);
521    }
522
523    let title = doc.frontmatter.title.to_ascii_lowercase();
524    let body = doc.body.to_ascii_lowercase();
525    let keywords: HashSet<String> = doc
526        .frontmatter
527        .retrieval
528        .keywords
529        .iter()
530        .map(|value| value.to_ascii_lowercase())
531        .collect();
532    let tags: HashSet<String> = doc
533        .frontmatter
534        .tags
535        .iter()
536        .map(|value| value.to_ascii_lowercase())
537        .collect();
538    let entities: HashSet<String> = doc
539        .frontmatter
540        .retrieval
541        .entities
542        .iter()
543        .map(|value| value.to_ascii_lowercase())
544        .collect();
545
546    let mut score = 0.0;
547    let mut matched_any = false;
548    for token in &query_tokens {
549        let mut token_score = 0.0;
550        if title.contains(token) {
551            token_score += 3.0;
552        }
553        if keywords.contains(token) {
554            token_score += 2.5;
555        }
556        if tags.contains(token) {
557            token_score += 2.0;
558        }
559        if entities.contains(token) {
560            token_score += 1.5;
561        }
562        if body.contains(token) {
563            token_score += 1.0;
564        }
565        if token_score > 0.0 {
566            matched_any = true;
567            score += token_score;
568        }
569    }
570
571    matched_any.then_some(score / query_tokens.len() as f64)
572}
573
574pub fn build_memory_markdown_view(
575    scope: MemoryScope,
576    project_key: Option<&str>,
577    docs: &[DurableMemoryDocument],
578) -> String {
579    let title = match scope {
580        MemoryScope::Global => "# Bamboo Memory Index (Global)".to_string(),
581        MemoryScope::Project => format!(
582            "# Bamboo Memory Index (Project: {})",
583            project_key.unwrap_or("unknown")
584        ),
585        MemoryScope::Session => "# Bamboo Memory Index (Session)".to_string(),
586    };
587    let mut out = String::new();
588    out.push_str(&title);
589    out.push_str("\n\n");
590    if docs.is_empty() {
591        out.push_str("_(empty)_\n");
592        return out;
593    }
594
595    for doc in docs {
596        out.push_str(&format!(
597            "- `{}` {} [{} / {}] updated {}\n",
598            doc.frontmatter.id,
599            doc.frontmatter.title,
600            doc.frontmatter.r#type.as_str(),
601            doc.frontmatter.status.as_str(),
602            doc.frontmatter.updated_at,
603        ));
604        let summary = derive_summary(&doc.body, 160);
605        if !summary.is_empty() {
606            out.push_str(&format!("  - {}\n", summary));
607        }
608    }
609    out
610}
611
612pub fn build_recent_markdown_view(docs: &[DurableMemoryDocument]) -> String {
613    let mut out = String::from("# Recent Memory Updates\n\n");
614    if docs.is_empty() {
615        out.push_str("_(empty)_\n");
616        return out;
617    }
618    for doc in docs.iter().take(20) {
619        out.push_str(&format!(
620            "- `{}` {} — {}\n",
621            doc.frontmatter.id, doc.frontmatter.title, doc.frontmatter.updated_at
622        ));
623    }
624    out
625}
626
627pub fn build_stale_markdown_view(docs: &[DurableMemoryDocument]) -> String {
628    let mut out = String::from("# Stale Memory Candidates\n\n");
629    let stale: Vec<_> = docs
630        .iter()
631        .filter(|doc| doc.frontmatter.status != DurableMemoryStatus::Active)
632        .collect();
633    if stale.is_empty() {
634        out.push_str("_(no stale items)_\n");
635        return out;
636    }
637    for doc in stale {
638        out.push_str(&format!(
639            "- `{}` {} [{}]\n",
640            doc.frontmatter.id,
641            doc.frontmatter.title,
642            doc.frontmatter.status.as_str()
643        ));
644    }
645    out
646}
647
648pub fn build_dream_view(existing: Option<&str>) -> String {
649    match existing.map(str::trim).filter(|value| !value.is_empty()) {
650        Some(value) => value.to_string(),
651        None => "# Bamboo Dream Notebook\n\n_(empty)_\n".to_string(),
652    }
653}
654
655pub fn parse_query_cursor(cursor: Option<&str>) -> usize {
656    cursor
657        .and_then(|raw| raw.rsplit(':').next())
658        .and_then(|raw| raw.parse::<usize>().ok())
659        .unwrap_or(0)
660}
661
662pub fn make_query_cursor(scope: MemoryScope, offset: usize) -> String {
663    format!("{}:{}", scope.as_str(), offset)
664}
665
666pub fn summary_json(items: usize, total: usize) -> String {
667    if total == 0 {
668        "No matching memories found.".to_string()
669    } else {
670        format!("Returned top {} of {} matching memories.", items, total)
671    }
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677
678    #[test]
679    fn normalize_tags_dedupes_and_sanitizes() {
680        let tags = normalize_tags(["User Preference", "user-preference", "release/freeze"]);
681        assert_eq!(tags, vec!["release-freeze", "user-preference"]);
682    }
683
684    #[test]
685    fn project_key_from_path_is_stable() {
686        let key = project_key_from_path(Path::new("/tmp/My Project"));
687        assert!(key.starts_with("my-project-"));
688    }
689
690    #[test]
691    fn parse_markdown_document_requires_frontmatter() {
692        let result = parse_markdown_document("plain body");
693        assert!(result.is_err());
694    }
695}