1use std::collections::{BTreeMap, BTreeSet, HashSet};
2use std::io;
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8pub mod freshness;
9pub mod paths;
10pub mod recall;
11pub mod store;
12pub mod types;
13
14pub use freshness::{
15 memory_age_days, memory_age_label, memory_freshness_text, render_memory_freshness_note,
16 FreshnessKind,
17};
18pub use paths::{MemoryPathResolver, SESSIONS_DIR, TOPICS_DIR};
19pub use recall::{
20 select_relevant_memories, shortlist_relevant_memories, MemoryRecallCandidate,
21 MemoryRecallOptions, MemoryRecallRerankContext, MemoryRecallSelection, MemoryRecallStrategy,
22};
23pub use store::MemoryStore;
24pub use types::{
25 BlobScanItem, BlobScanReport, CreatedBy, DuplicateCluster, DuplicateClusterMember,
26 DuplicateScanReport, DurableContentLocation, DurableMemoryDocument, DurableMemoryFrontmatter,
27 DurableMemoryRef, DurableMemoryRelations, DurableMemoryRetrieval, DurableMemorySource,
28 DurableMemoryStatus, DurableMemoryType, MemoryConsolidateResult, MemoryContradictionResult,
29 MemoryDuplicateCandidate, MemoryInspectResult, MemoryMergeResult, MemoryPurgeResult,
30 MemoryQueryCursor, MemoryQueryItem, MemoryQueryOptions, MemoryQueryResult, MemoryScope,
31 MemorySplitPiece, MemorySplitResult, SessionState,
32};
33
34pub const MEMORY_SCHEMA_VERSION: u32 = 1;
35pub const DEFAULT_SESSION_TOPIC: &str = "default";
36pub const MAX_SESSION_TOPIC_LEN: usize = 50;
37pub const MAX_MEMORY_TITLE_LEN: usize = 160;
38pub const MAX_MEMORY_TAGS: usize = 32;
39pub const DEFAULT_QUERY_LIMIT: usize = 5;
40pub const MAX_QUERY_LIMIT: usize = 20;
41pub const DEFAULT_MAX_CHARS: usize = 3_000;
42pub const MAX_MAX_CHARS: usize = 6_000;
43pub const WRITE_AUDIT_LOG: &str = "write_audit.jsonl";
44pub const MERGE_AUDIT_LOG: &str = "merge_audit.jsonl";
45pub const PURGE_AUDIT_LOG: &str = "purge_audit.jsonl";
46pub const CONTRADICTION_AUDIT_LOG: &str = "contradiction_audit.jsonl";
47pub const DREAM_VIEW_FILE: &str = "DREAM_NOTEBOOK.md";
48pub const MEMORY_VIEW_FILE: &str = "MEMORY.md";
49pub const RECENT_VIEW_FILE: &str = "RECENT.md";
50pub const STALE_VIEW_FILE: &str = "STALE.md";
51pub const LEXICAL_INDEX_FILE: &str = "lexical.json";
52pub const GRAPH_INDEX_FILE: &str = "graph.json";
53pub const RECENT_INDEX_FILE: &str = "recent.json";
54pub const STALE_CANDIDATES_INDEX_FILE: &str = "stale_candidates.json";
55pub const TAXONOMY_INDEX_FILE: &str = "taxonomy.json";
56
57pub fn validate_session_id(session_id: &str) -> io::Result<&str> {
58 let trimmed = session_id.trim();
59 if trimmed.is_empty() {
60 return Err(io::Error::new(
61 io::ErrorKind::InvalidInput,
62 "session_id cannot be empty",
63 ));
64 }
65 if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
66 return Err(io::Error::new(
67 io::ErrorKind::InvalidInput,
68 "session_id contains invalid path characters",
69 ));
70 }
71 if !trimmed
72 .chars()
73 .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.')
74 {
75 return Err(io::Error::new(
76 io::ErrorKind::InvalidInput,
77 "session_id contains unsupported characters",
78 ));
79 }
80 Ok(trimmed)
81}
82
83pub fn validate_session_topic(topic: &str) -> io::Result<&str> {
84 let trimmed = topic.trim();
85 if trimmed.is_empty() {
86 return Err(io::Error::new(
87 io::ErrorKind::InvalidInput,
88 "topic cannot be empty",
89 ));
90 }
91 if trimmed.len() > MAX_SESSION_TOPIC_LEN {
92 return Err(io::Error::new(
93 io::ErrorKind::InvalidInput,
94 format!(
95 "topic name too long (max {} chars, got {})",
96 MAX_SESSION_TOPIC_LEN,
97 trimmed.len()
98 ),
99 ));
100 }
101 if trimmed.contains('/') || trimmed.contains('\\') || trimmed.contains("..") {
102 return Err(io::Error::new(
103 io::ErrorKind::InvalidInput,
104 "topic contains invalid path characters",
105 ));
106 }
107 if !trimmed
108 .chars()
109 .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
110 {
111 return Err(io::Error::new(
112 io::ErrorKind::InvalidInput,
113 "topic must contain only alphanumeric, dash, or underscore characters",
114 ));
115 }
116 Ok(trimmed)
117}
118
119pub fn validate_memory_title(title: &str) -> io::Result<&str> {
120 let trimmed = title.trim();
121 if trimmed.is_empty() {
122 return Err(io::Error::new(
123 io::ErrorKind::InvalidInput,
124 "title cannot be empty",
125 ));
126 }
127 if trimmed.chars().count() > MAX_MEMORY_TITLE_LEN {
128 return Err(io::Error::new(
129 io::ErrorKind::InvalidInput,
130 format!("title too long (max {} chars)", MAX_MEMORY_TITLE_LEN),
131 ));
132 }
133 Ok(trimmed)
134}
135
136pub fn normalize_tag(tag: &str) -> Option<String> {
137 let trimmed = tag.trim();
138 if trimmed.is_empty() {
139 return None;
140 }
141 let mut out = String::with_capacity(trimmed.len());
142 let mut prev_dash = false;
143 for ch in trimmed.chars() {
144 let normalized = match ch {
145 'A'..='Z' => ch.to_ascii_lowercase(),
146 'a'..='z' | '0'..='9' => ch,
147 '-' | '_' | ' ' | '.' | '/' => '-',
148 _ => continue,
149 };
150 if normalized == '-' {
151 if prev_dash {
152 continue;
153 }
154 prev_dash = true;
155 out.push(normalized);
156 } else {
157 prev_dash = false;
158 out.push(normalized);
159 }
160 }
161 let normalized = out.trim_matches('-').to_string();
162 (!normalized.is_empty()).then_some(normalized)
163}
164
165pub fn normalize_tags<I, S>(tags: I) -> Vec<String>
166where
167 I: IntoIterator<Item = S>,
168 S: AsRef<str>,
169{
170 let mut seen = BTreeSet::new();
171 for tag in tags {
172 if let Some(tag) = normalize_tag(tag.as_ref()) {
173 seen.insert(tag);
174 if seen.len() >= MAX_MEMORY_TAGS {
175 break;
176 }
177 }
178 }
179 seen.into_iter().collect()
180}
181
182pub fn truncate_chars(value: &str, max_chars: usize) -> (String, bool) {
183 let mut out = String::new();
184 for (count, ch) in value.chars().enumerate() {
185 if count >= max_chars {
186 return (out, true);
187 }
188 out.push(ch);
189 }
190 (out, false)
191}
192
193pub fn count_chars(value: &str) -> usize {
194 value.chars().count()
195}
196
197pub fn now_rfc3339() -> String {
198 Utc::now().to_rfc3339()
199}
200
201pub fn derive_summary(content: &str, max_chars: usize) -> String {
202 let collapsed = content
203 .lines()
204 .map(str::trim)
205 .filter(|line| !line.is_empty())
206 .collect::<Vec<_>>()
207 .join(" ");
208 let (summary, truncated) = truncate_chars(&collapsed, max_chars);
209 if truncated {
210 format!("{}...", summary.trim_end())
211 } else {
212 summary
213 }
214}
215
216pub fn extract_keywords(title: &str, content: &str, tags: &[String]) -> Vec<String> {
217 let mut seen = BTreeSet::new();
218 for tag in tags {
219 if let Some(tag) = normalize_tag(tag) {
220 seen.insert(tag);
221 }
222 }
223
224 let combined = format!("{}\n{}", title, content);
225 let mut current = String::new();
226 for ch in combined.chars() {
227 if ch.is_ascii_alphanumeric() {
228 current.push(ch.to_ascii_lowercase());
229 continue;
230 }
231 if current.len() >= 3 {
232 seen.insert(current.clone());
233 }
234 current.clear();
235 }
236 if current.len() >= 3 {
237 seen.insert(current);
238 }
239
240 seen.into_iter().take(128).collect()
241}
242
243pub fn detect_entities(title: &str, content: &str) -> Vec<String> {
244 let mut entities = BTreeSet::new();
245 for token in format!("{}\n{}", title, content)
246 .split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '/'))
247 {
248 let trimmed = token.trim();
249 if trimmed.len() < 3 {
250 continue;
251 }
252 let has_upper = trimmed.chars().any(|ch| ch.is_ascii_uppercase());
253 let has_separator = trimmed.contains('-') || trimmed.contains('_') || trimmed.contains('/');
254 if has_upper || has_separator {
255 entities.insert(trimmed.to_string());
256 }
257 }
258 entities.into_iter().take(64).collect()
259}
260
261pub fn sanitize_component(input: &str) -> String {
262 let trimmed = input.trim();
263 if trimmed.is_empty() {
264 return "unknown".to_string();
265 }
266
267 let mut out = String::with_capacity(trimmed.len());
268 let mut prev_dash = false;
269 for ch in trimmed.chars() {
270 let normalized = match ch {
271 'A'..='Z' => ch.to_ascii_lowercase(),
272 'a'..='z' | '0'..='9' => ch,
273 _ => '-',
274 };
275 if normalized == '-' {
276 if prev_dash {
277 continue;
278 }
279 prev_dash = true;
280 out.push('-');
281 } else {
282 prev_dash = false;
283 out.push(normalized);
284 }
285 }
286
287 let out = out.trim_matches('-').to_string();
288 if out.is_empty() {
289 "unknown".to_string()
290 } else {
291 out
292 }
293}
294
295pub fn project_key_from_path(path: &Path) -> String {
296 let canonical = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
297
298 if let Some(root) = find_git_root(&canonical) {
299 if let Some(name) = root.file_name().and_then(|value| value.to_str()) {
300 let mut key = sanitize_component(name);
301 if let Some(hash) =
302 short_stable_hash(&bamboo_infrastructure::paths::path_to_display_string(&root))
303 {
304 key.push('-');
305 key.push_str(&hash);
306 }
307 return key;
308 }
309 }
310
311 if let Some(name) = canonical.file_name().and_then(|value| value.to_str()) {
312 let mut key = sanitize_component(name);
313 if let Some(hash) = short_stable_hash(
314 &bamboo_infrastructure::paths::path_to_display_string(&canonical),
315 ) {
316 key.push('-');
317 key.push_str(&hash);
318 }
319 return key;
320 }
321
322 let raw = bamboo_infrastructure::paths::path_to_display_string(&canonical);
323 format!(
324 "path-{}",
325 short_stable_hash(&raw).unwrap_or_else(|| "unknown".to_string())
326 )
327}
328
329pub fn find_git_root(start: &Path) -> Option<PathBuf> {
330 for ancestor in start.ancestors() {
331 let git_dir = ancestor.join(".git");
332 if git_dir.is_dir() || git_dir.is_file() {
333 return Some(ancestor.to_path_buf());
334 }
335 }
336 None
337}
338
339pub fn short_stable_hash(input: &str) -> Option<String> {
340 use std::hash::{Hash, Hasher};
341
342 let trimmed = input.trim();
343 if trimmed.is_empty() {
344 return None;
345 }
346 let mut hasher = std::collections::hash_map::DefaultHasher::new();
347 trimmed.hash(&mut hasher);
348 Some(format!("{:08x}", (hasher.finish() & 0xffff_ffff) as u32))
349}
350
351pub fn build_yaml_frontmatter(frontmatter: &DurableMemoryFrontmatter) -> io::Result<String> {
352 serde_yaml::to_string(frontmatter).map_err(|error| {
353 io::Error::new(
354 io::ErrorKind::InvalidData,
355 format!("failed to serialize memory frontmatter: {error}"),
356 )
357 })
358}
359
360pub fn parse_markdown_document(content: &str) -> io::Result<(DurableMemoryFrontmatter, String)> {
361 let trimmed = content.trim_start_matches('\u{feff}');
362 let Some(rest) = trimmed.strip_prefix("---\n") else {
363 return Err(io::Error::new(
364 io::ErrorKind::InvalidData,
365 "missing frontmatter start marker",
366 ));
367 };
368 let Some(end_idx) = rest.find("\n---\n") else {
369 return Err(io::Error::new(
370 io::ErrorKind::InvalidData,
371 "missing frontmatter end marker",
372 ));
373 };
374 let yaml = &rest[..end_idx];
375 let body = &rest[end_idx + "\n---\n".len()..];
376 let frontmatter: DurableMemoryFrontmatter = serde_yaml::from_str(yaml).map_err(|error| {
377 io::Error::new(
378 io::ErrorKind::InvalidData,
379 format!("failed to parse memory frontmatter: {error}"),
380 )
381 })?;
382 Ok((frontmatter, body.trim().to_string()))
383}
384
385pub fn render_markdown_document(
386 frontmatter: &DurableMemoryFrontmatter,
387 body: &str,
388) -> io::Result<String> {
389 let yaml = build_yaml_frontmatter(frontmatter)?;
390 Ok(format!("---\n{}---\n\n{}\n", yaml, body.trim()))
391}
392
393#[derive(Debug, Clone, Serialize, Deserialize, Default)]
394pub struct LexicalIndex {
395 pub generated_at: String,
396 pub items: Vec<LexicalIndexItem>,
397}
398
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct LexicalIndexItem {
401 pub id: String,
402 pub title: String,
403 pub scope: MemoryScope,
404 pub project_key: Option<String>,
405 pub r#type: DurableMemoryType,
406 pub status: DurableMemoryStatus,
407 pub tags: Vec<String>,
408 pub keywords: Vec<String>,
409 pub entities: Vec<String>,
410 pub updated_at: String,
411 pub created_at: String,
412 pub summary: String,
413}
414
415#[derive(Debug, Clone, Serialize, Deserialize, Default)]
416pub struct RecentIndex {
417 pub generated_at: String,
418 pub items: Vec<RecentIndexItem>,
419}
420
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct RecentIndexItem {
423 pub id: String,
424 pub title: String,
425 pub updated_at: String,
426 pub last_accessed_at: Option<String>,
427 pub status: DurableMemoryStatus,
428}
429
430#[derive(Debug, Clone, Serialize, Deserialize, Default)]
431pub struct GraphIndex {
432 pub generated_at: String,
433 pub items: Vec<GraphIndexItem>,
434}
435
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct GraphIndexItem {
438 pub id: String,
439 pub related: Vec<String>,
440 pub supersedes: Vec<String>,
441 pub contradicted_by: Vec<String>,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize, Default)]
445pub struct StaleCandidatesIndex {
446 pub generated_at: String,
447 pub items: Vec<StaleCandidateItem>,
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize)]
451pub struct StaleCandidateItem {
452 pub id: String,
453 pub title: String,
454 pub status: DurableMemoryStatus,
455 pub updated_at: String,
456 pub reason: String,
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize, Default)]
460pub struct TaxonomyIndex {
461 pub generated_at: String,
462 pub by_type: BTreeMap<String, usize>,
463 pub by_status: BTreeMap<String, usize>,
464 pub by_scope: BTreeMap<String, usize>,
465 pub total: usize,
466}
467
468#[derive(Debug, Clone, Serialize, Deserialize)]
469pub struct AuditLogEntry {
470 pub timestamp: String,
471 pub action: String,
472 pub scope: MemoryScope,
473 pub memory_id: Option<String>,
474 pub session_id: Option<String>,
475 pub topic: Option<String>,
476 pub summary: String,
477 #[serde(default, skip_serializing_if = "Option::is_none")]
478 pub metadata: Option<serde_json::Value>,
479}
480
481pub fn parse_rfc3339(value: &str) -> Option<DateTime<Utc>> {
482 chrono::DateTime::parse_from_rfc3339(value)
483 .ok()
484 .map(|dt| dt.with_timezone(&Utc))
485}
486
487pub fn sort_memories_desc(memories: &mut [DurableMemoryDocument]) {
488 memories.sort_by(|left, right| {
489 let left_dt =
490 parse_rfc3339(&left.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
491 let right_dt =
492 parse_rfc3339(&right.frontmatter.updated_at).unwrap_or(DateTime::<Utc>::MIN_UTC);
493 right_dt
494 .cmp(&left_dt)
495 .then_with(|| left.frontmatter.id.cmp(&right.frontmatter.id))
496 });
497}
498
499pub fn match_memory_query(
500 doc: &DurableMemoryDocument,
501 query: Option<&str>,
502 filter_types: Option<&HashSet<DurableMemoryType>>,
503 filter_statuses: Option<&HashSet<DurableMemoryStatus>>,
504) -> Option<f64> {
505 if let Some(types) = filter_types {
506 if !types.contains(&doc.frontmatter.r#type) {
507 return None;
508 }
509 }
510 if let Some(statuses) = filter_statuses {
511 if !statuses.contains(&doc.frontmatter.status) {
512 return None;
513 }
514 }
515
516 let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
517 return Some(1.0);
518 };
519
520 let query_tokens = extract_keywords(query, "", &[]);
521 if query_tokens.is_empty() {
522 return Some(1.0);
523 }
524
525 let title = doc.frontmatter.title.to_ascii_lowercase();
526 let body = doc.body.to_ascii_lowercase();
527 let keywords: HashSet<String> = doc
528 .frontmatter
529 .retrieval
530 .keywords
531 .iter()
532 .map(|value| value.to_ascii_lowercase())
533 .collect();
534 let tags: HashSet<String> = doc
535 .frontmatter
536 .tags
537 .iter()
538 .map(|value| value.to_ascii_lowercase())
539 .collect();
540 let entities: HashSet<String> = doc
541 .frontmatter
542 .retrieval
543 .entities
544 .iter()
545 .map(|value| value.to_ascii_lowercase())
546 .collect();
547
548 let mut score = 0.0;
549 let mut matched_any = false;
550 for token in &query_tokens {
551 let mut token_score = 0.0;
552 if title.contains(token) {
553 token_score += 3.0;
554 }
555 if keywords.contains(token) {
556 token_score += 2.5;
557 }
558 if tags.contains(token) {
559 token_score += 2.0;
560 }
561 if entities.contains(token) {
562 token_score += 1.5;
563 }
564 if body.contains(token) {
565 token_score += 1.0;
566 }
567 if token_score > 0.0 {
568 matched_any = true;
569 score += token_score;
570 }
571 }
572
573 matched_any.then_some(score / query_tokens.len() as f64)
574}
575
576pub fn build_memory_markdown_view(
577 scope: MemoryScope,
578 project_key: Option<&str>,
579 docs: &[DurableMemoryDocument],
580) -> String {
581 let title = match scope {
582 MemoryScope::Global => "# Bamboo Memory Index (Global)".to_string(),
583 MemoryScope::Project => format!(
584 "# Bamboo Memory Index (Project: {})",
585 project_key.unwrap_or("unknown")
586 ),
587 MemoryScope::Session => "# Bamboo Memory Index (Session)".to_string(),
588 };
589 let mut out = String::new();
590 out.push_str(&title);
591 out.push_str("\n\n");
592 if docs.is_empty() {
593 out.push_str("_(empty)_\n");
594 return out;
595 }
596
597 for doc in docs {
598 out.push_str(&format!(
599 "- `{}` {} [{} / {}] updated {}\n",
600 doc.frontmatter.id,
601 doc.frontmatter.title,
602 doc.frontmatter.r#type.as_str(),
603 doc.frontmatter.status.as_str(),
604 doc.frontmatter.updated_at,
605 ));
606 let summary = derive_summary(&doc.body, 160);
607 if !summary.is_empty() {
608 out.push_str(&format!(" - {}\n", summary));
609 }
610 }
611 out
612}
613
614pub fn build_recent_markdown_view(docs: &[DurableMemoryDocument]) -> String {
615 let mut out = String::from("# Recent Memory Updates\n\n");
616 if docs.is_empty() {
617 out.push_str("_(empty)_\n");
618 return out;
619 }
620 for doc in docs.iter().take(20) {
621 out.push_str(&format!(
622 "- `{}` {} — {}\n",
623 doc.frontmatter.id, doc.frontmatter.title, doc.frontmatter.updated_at
624 ));
625 }
626 out
627}
628
629pub fn build_stale_markdown_view(docs: &[DurableMemoryDocument]) -> String {
630 let mut out = String::from("# Stale Memory Candidates\n\n");
631 let stale: Vec<_> = docs
632 .iter()
633 .filter(|doc| doc.frontmatter.status != DurableMemoryStatus::Active)
634 .collect();
635 if stale.is_empty() {
636 out.push_str("_(no stale items)_\n");
637 return out;
638 }
639 for doc in stale {
640 out.push_str(&format!(
641 "- `{}` {} [{}]\n",
642 doc.frontmatter.id,
643 doc.frontmatter.title,
644 doc.frontmatter.status.as_str()
645 ));
646 }
647 out
648}
649
650pub fn build_dream_view(existing: Option<&str>) -> String {
651 match existing.map(str::trim).filter(|value| !value.is_empty()) {
652 Some(value) => value.to_string(),
653 None => "# Bamboo Dream Notebook\n\n_(empty)_\n".to_string(),
654 }
655}
656
657pub fn parse_query_cursor(cursor: Option<&str>) -> usize {
658 cursor
659 .and_then(|raw| raw.rsplit(':').next())
660 .and_then(|raw| raw.parse::<usize>().ok())
661 .unwrap_or(0)
662}
663
664pub fn make_query_cursor(scope: MemoryScope, offset: usize) -> String {
665 format!("{}:{}", scope.as_str(), offset)
666}
667
668pub fn summary_json(items: usize, total: usize) -> String {
669 if total == 0 {
670 "No matching memories found.".to_string()
671 } else {
672 format!("Returned top {} of {} matching memories.", items, total)
673 }
674}
675
676#[cfg(test)]
677mod tests {
678 use super::*;
679
680 #[test]
681 fn normalize_tags_dedupes_and_sanitizes() {
682 let tags = normalize_tags(["User Preference", "user-preference", "release/freeze"]);
683 assert_eq!(tags, vec!["release-freeze", "user-preference"]);
684 }
685
686 #[test]
687 fn project_key_from_path_is_stable() {
688 let key = project_key_from_path(Path::new("/tmp/My Project"));
689 assert!(key.starts_with("my-project-"));
690 }
691
692 #[test]
693 fn parse_markdown_document_requires_frontmatter() {
694 let result = parse_markdown_document("plain body");
695 assert!(result.is_err());
696 }
697}