Skip to main content

lean_ctx/core/
consolidation.rs

1//! Consolidation engine for provider data — hippocampal sleep replay.
2//!
3//! Converts provider results into long-term context artifacts:
4//!   1. BM25/embedding index chunks (for future searches)
5//!   2. Cross-source graph edges (for related-file discovery)
6//!   3. Knowledge facts (for semantic memory)
7//!   4. Session cache entries (for fast re-reads at ~13 tokens)
8//!
9//! This is the "sleep replay" mechanism: raw episodic data (provider API
10//! responses) is consolidated into durable semantic representations.
11//!
12//! Scientific basis: Hippocampal memory consolidation (Kitamura, Science 2017).
13//! Fast hippocampal (session cache) traces are replayed to build slow
14//! neocortical (knowledge + graph + index) representations.
15
16use crate::core::content_chunk::ContentChunk;
17use crate::core::cross_source_edges;
18use crate::core::graph_index::IndexEdge;
19use crate::core::knowledge_provider_extract::{self, ExtractedFact};
20
21/// Result of a consolidation run — tells the caller what was created.
22#[derive(Debug, Clone, Default)]
23pub struct ConsolidationResult {
24    pub chunks_indexed: usize,
25    pub edges_created: usize,
26    pub facts_extracted: usize,
27    pub cache_entries_stored: usize,
28}
29
30/// Consolidate a batch of ContentChunks into all long-term stores.
31///
32/// This is the main entry point. It does NOT perform I/O itself — it returns
33/// the artifacts that the caller should persist. This keeps the consolidation
34/// logic pure and testable.
35pub fn consolidate(chunks: &[ContentChunk]) -> ConsolidationArtifacts {
36    let external_chunks: Vec<&ContentChunk> = chunks.iter().filter(|c| c.is_external()).collect();
37
38    if external_chunks.is_empty() {
39        return ConsolidationArtifacts::default();
40    }
41
42    let edges = cross_source_edges::extract_cross_source_edges(chunks);
43
44    let facts = knowledge_provider_extract::extract_facts(chunks);
45
46    let cache_entries: Vec<CacheableProviderResult> = external_chunks
47        .iter()
48        .map(|c| CacheableProviderResult {
49            uri: c.file_path.clone(),
50            content: c.content.clone(),
51            token_count: c.token_count,
52        })
53        .collect();
54
55    ConsolidationArtifacts {
56        bm25_chunks: chunks.to_vec(),
57        edges,
58        facts,
59        cache_entries,
60    }
61}
62
63/// Pure artifacts produced by consolidation — no side effects yet.
64#[derive(Debug, Clone, Default)]
65pub struct ConsolidationArtifacts {
66    pub bm25_chunks: Vec<ContentChunk>,
67    pub edges: Vec<IndexEdge>,
68    pub facts: Vec<ExtractedFact>,
69    pub cache_entries: Vec<CacheableProviderResult>,
70}
71
72impl ConsolidationArtifacts {
73    pub fn is_empty(&self) -> bool {
74        self.bm25_chunks.is_empty()
75            && self.edges.is_empty()
76            && self.facts.is_empty()
77            && self.cache_entries.is_empty()
78    }
79
80    pub fn summary(&self) -> ConsolidationResult {
81        ConsolidationResult {
82            chunks_indexed: self.bm25_chunks.iter().filter(|c| c.is_external()).count(),
83            edges_created: self.edges.len(),
84            facts_extracted: self.facts.len(),
85            cache_entries_stored: self.cache_entries.len(),
86        }
87    }
88}
89
90/// A provider result ready to be stored in the session cache.
91#[derive(Debug, Clone)]
92pub struct CacheableProviderResult {
93    pub uri: String,
94    pub content: String,
95    pub token_count: usize,
96}
97
98/// Apply consolidation artifacts to the live systems.
99///
100/// This function performs the actual side effects: writing to BM25, graph,
101/// knowledge, and session cache. Designed to be called from a background
102/// thread or after a provider query returns.
103pub fn apply_artifacts(
104    artifacts: &ConsolidationArtifacts,
105    bm25: Option<&mut crate::core::bm25_index::BM25Index>,
106    graph_edges: Option<&mut Vec<IndexEdge>>,
107    session_cache: Option<&mut crate::core::cache::SessionCache>,
108) -> ConsolidationResult {
109    let mut result = ConsolidationResult::default();
110
111    if let Some(index) = bm25 {
112        result.chunks_indexed = index.ingest_content_chunks(artifacts.bm25_chunks.clone());
113    }
114
115    if let Some(edges) = graph_edges {
116        result.edges_created = cross_source_edges::merge_edges(edges, artifacts.edges.clone());
117    }
118
119    result.facts_extracted = artifacts.facts.len();
120
121    if let Some(cache) = session_cache {
122        for entry in &artifacts.cache_entries {
123            cache.store(&entry.uri, &entry.content);
124            result.cache_entries_stored += 1;
125        }
126    }
127
128    result
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::core::bm25_index::{BM25Index, ChunkKind};
135    use crate::core::cache::SessionCache;
136    use crate::core::content_chunk::ContentChunk;
137
138    fn sample_chunks() -> Vec<ContentChunk> {
139        vec![
140            ContentChunk::from_provider(
141                "github",
142                "issues",
143                "42",
144                "Auth token bug",
145                ChunkKind::Issue,
146                "Token expires too early in src/auth.rs".into(),
147                vec!["src/auth.rs".into()],
148                Some(serde_json::json!({"state": "open", "labels": ["bug"]})),
149            ),
150            ContentChunk::from_provider(
151                "github",
152                "pull_requests",
153                "100",
154                "Fix auth expiry",
155                ChunkKind::PullRequest,
156                "Fixes token lifetime calculation in src/auth.rs".into(),
157                vec!["src/auth.rs".into()],
158                Some(serde_json::json!({"state": "open"})),
159            ),
160        ]
161    }
162
163    #[test]
164    fn consolidate_produces_all_artifact_types() {
165        let chunks = sample_chunks();
166        let artifacts = consolidate(&chunks);
167
168        assert!(!artifacts.is_empty());
169        assert_eq!(artifacts.bm25_chunks.len(), 2);
170        assert!(!artifacts.edges.is_empty());
171        assert!(!artifacts.facts.is_empty());
172        assert_eq!(artifacts.cache_entries.len(), 2);
173    }
174
175    #[test]
176    fn consolidate_empty_input_produces_empty_artifacts() {
177        let artifacts = consolidate(&[]);
178        assert!(artifacts.is_empty());
179    }
180
181    #[test]
182    fn consolidate_code_only_produces_empty_external_artifacts() {
183        let code = ContentChunk::from(crate::core::bm25_index::CodeChunk {
184            file_path: "src/main.rs".into(),
185            symbol_name: "main".into(),
186            kind: ChunkKind::Function,
187            start_line: 1,
188            end_line: 5,
189            content: "fn main() {}".into(),
190            tokens: vec![],
191            token_count: 0,
192        });
193        let artifacts = consolidate(&[code]);
194        assert!(artifacts.edges.is_empty());
195        assert!(artifacts.facts.is_empty());
196        assert!(artifacts.cache_entries.is_empty());
197    }
198
199    #[test]
200    fn consolidation_summary_counts_correctly() {
201        let chunks = sample_chunks();
202        let artifacts = consolidate(&chunks);
203        let summary = artifacts.summary();
204
205        assert_eq!(summary.chunks_indexed, 2);
206        assert!(summary.edges_created > 0);
207        assert!(summary.facts_extracted > 0);
208        assert_eq!(summary.cache_entries_stored, 2);
209    }
210
211    #[test]
212    fn apply_artifacts_to_bm25() {
213        let chunks = sample_chunks();
214        let artifacts = consolidate(&chunks);
215
216        let mut index = BM25Index {
217            chunks: Vec::new(),
218            inverted: std::collections::HashMap::new(),
219            avg_doc_len: 0.0,
220            doc_count: 0,
221            doc_freqs: std::collections::HashMap::new(),
222            files: std::collections::HashMap::new(),
223        };
224
225        let result = apply_artifacts(&artifacts, Some(&mut index), None, None);
226        assert_eq!(result.chunks_indexed, 2);
227        assert_eq!(index.doc_count, 2);
228        assert_eq!(index.external_chunk_count(), 2);
229    }
230
231    #[test]
232    fn apply_artifacts_to_graph() {
233        let chunks = sample_chunks();
234        let artifacts = consolidate(&chunks);
235
236        let mut edges: Vec<IndexEdge> = Vec::new();
237        let result = apply_artifacts(&artifacts, None, Some(&mut edges), None);
238
239        assert!(result.edges_created > 0);
240        assert!(!edges.is_empty());
241        assert!(edges.iter().any(|e| e.to == "src/auth.rs"));
242    }
243
244    #[test]
245    fn apply_artifacts_to_session_cache() {
246        let chunks = sample_chunks();
247        let artifacts = consolidate(&chunks);
248
249        let mut cache = SessionCache::new();
250        let result = apply_artifacts(&artifacts, None, None, Some(&mut cache));
251
252        assert_eq!(result.cache_entries_stored, 2);
253        assert!(cache.get("github://issues/42").is_some());
254        assert!(cache.get("github://pull_requests/100").is_some());
255    }
256
257    #[test]
258    fn apply_artifacts_to_all_systems() {
259        let chunks = sample_chunks();
260        let artifacts = consolidate(&chunks);
261
262        let mut index = BM25Index {
263            chunks: Vec::new(),
264            inverted: std::collections::HashMap::new(),
265            avg_doc_len: 0.0,
266            doc_count: 0,
267            doc_freqs: std::collections::HashMap::new(),
268            files: std::collections::HashMap::new(),
269        };
270        let mut edges: Vec<IndexEdge> = Vec::new();
271        let mut cache = SessionCache::new();
272
273        let result = apply_artifacts(
274            &artifacts,
275            Some(&mut index),
276            Some(&mut edges),
277            Some(&mut cache),
278        );
279
280        assert!(result.chunks_indexed > 0);
281        assert!(result.edges_created > 0);
282        assert!(result.facts_extracted > 0);
283        assert!(result.cache_entries_stored > 0);
284    }
285}