use crate::memory_core::palace::Drawer;
use crate::memory_core::retrieval::{PalaceHandle, shared_embedder};
use crate::memory_core::store::vector::VectorStore;
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use uuid::Uuid;
pub(crate) const CONTENT_BLOCKLIST: &[&str] = &[
"Tool use: ", "Claude Code session", ];
pub(crate) const STOP_WORDS: &[&str] = &[
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "of", "in", "on", "at",
"to", "for", "with", "and", "or", "but", "not", "no", "yes", "i", "you", "he", "she", "it",
"we", "they", "this", "that", "these", "those", "as", "by", "from", "into", "over", "under",
"if", "then", "than", "so", "do", "does", "did", "have", "has", "had", "will", "would",
"shall", "should", "can", "could", "may", "might", "must", "about", "any", "all", "some",
"more", "most", "such",
];
pub fn extract_keywords(content: &str) -> Vec<String> {
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut out: Vec<String> = Vec::new();
for raw in content.split_whitespace() {
let token: String = raw
.chars()
.filter(|c| c.is_alphanumeric())
.flat_map(|c| c.to_lowercase())
.collect();
if token.len() < 3 {
continue;
}
if STOP_WORDS.iter().any(|s| *s == token) {
continue;
}
if seen.insert(token.clone()) {
out.push(token);
}
}
out
}
pub(crate) fn is_low_quality_content(content: &str, min_words: usize) -> bool {
let trimmed = content.trim_start();
if CONTENT_BLOCKLIST.iter().any(|pat| trimmed.contains(pat)) {
return true;
}
let word_count = content.split_whitespace().count();
word_count < min_words
}
pub(crate) fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
pub(crate) fn merge_into(handle: &Arc<PalaceHandle>, survivor: &Drawer, loser: &Drawer) {
let mut drawers = handle.drawers.write();
if let Some(target) = drawers.iter_mut().find(|d| d.id == survivor.id) {
let mut combined = target.content.clone();
combined.push_str("\n\nAlso: ");
combined.push_str(&loser.content);
if combined.len() > 500 {
combined.truncate(500);
}
target.content = combined;
target.importance = target.importance.max(loser.importance);
for tag in &loser.tags {
if !target.tags.contains(tag) {
target.tags.push(tag.clone());
}
}
}
}
pub(crate) async fn rebuild_index_from_drawers(
handle: &Arc<PalaceHandle>,
started: std::time::Instant,
budget: Duration,
) -> Result<usize> {
let snapshot: Vec<Drawer> = handle.drawers.read().clone();
handle
.vector_store
.reset()
.context("reset vector index for rebuild")?;
if snapshot.is_empty() {
return Ok(0);
}
let embedder = shared_embedder()
.await
.context("acquire shared embedder for dream rebuild")?;
let mut rebuilt: usize = 0;
for drawer in snapshot.iter() {
if started.elapsed() >= budget {
break;
}
let vecs = embedder
.embed_batch(std::slice::from_ref(&drawer.content))
.await
.with_context(|| format!("re-embed drawer {}", drawer.id))?;
if let Some(v) = vecs.into_iter().next() {
handle
.vector_store
.upsert(drawer.id, v)
.await
.with_context(|| format!("re-upsert drawer {}", drawer.id))?;
rebuilt += 1;
}
}
Ok(rebuilt)
}
pub(crate) fn build_closet_index(drawers: &[Drawer]) -> HashMap<String, Vec<Uuid>> {
let mut new_index: HashMap<String, Vec<Uuid>> = HashMap::new();
for drawer in drawers.iter() {
for kw in extract_keywords(&drawer.content) {
new_index.entry(kw).or_default().push(drawer.id);
}
}
new_index
}