1use crate::core::content_chunk::ContentChunk;
17use crate::core::cross_source_edges;
18use crate::core::graph_index::IndexEdge;
19use crate::core::knowledge_provider_extract::{self, ExtractedFact};
20
21#[derive(Debug, Clone, Default)]
23pub struct ConsolidationResult {
24 pub chunks_indexed: usize,
25 pub edges_created: usize,
26 pub facts_extracted: usize,
27 pub cache_entries_stored: usize,
28}
29
30pub fn consolidate(chunks: &[ContentChunk]) -> ConsolidationArtifacts {
36 let external_chunks: Vec<&ContentChunk> = chunks.iter().filter(|c| c.is_external()).collect();
37
38 if external_chunks.is_empty() {
39 return ConsolidationArtifacts::default();
40 }
41
42 let edges = cross_source_edges::extract_cross_source_edges(chunks);
43
44 let facts = knowledge_provider_extract::extract_facts(chunks);
45
46 let cache_entries: Vec<CacheableProviderResult> = external_chunks
47 .iter()
48 .map(|c| CacheableProviderResult {
49 uri: c.file_path.clone(),
50 content: c.content.clone(),
51 token_count: c.token_count,
52 })
53 .collect();
54
55 ConsolidationArtifacts {
56 bm25_chunks: chunks.to_vec(),
57 edges,
58 facts,
59 cache_entries,
60 }
61}
62
63#[derive(Debug, Clone, Default)]
65pub struct ConsolidationArtifacts {
66 pub bm25_chunks: Vec<ContentChunk>,
67 pub edges: Vec<IndexEdge>,
68 pub facts: Vec<ExtractedFact>,
69 pub cache_entries: Vec<CacheableProviderResult>,
70}
71
72impl ConsolidationArtifacts {
73 pub fn is_empty(&self) -> bool {
74 self.bm25_chunks.is_empty()
75 && self.edges.is_empty()
76 && self.facts.is_empty()
77 && self.cache_entries.is_empty()
78 }
79
80 pub fn summary(&self) -> ConsolidationResult {
81 ConsolidationResult {
82 chunks_indexed: self.bm25_chunks.iter().filter(|c| c.is_external()).count(),
83 edges_created: self.edges.len(),
84 facts_extracted: self.facts.len(),
85 cache_entries_stored: self.cache_entries.len(),
86 }
87 }
88}
89
90#[derive(Debug, Clone)]
92pub struct CacheableProviderResult {
93 pub uri: String,
94 pub content: String,
95 pub token_count: usize,
96}
97
98pub fn apply_artifacts(
104 artifacts: &ConsolidationArtifacts,
105 bm25: Option<&mut crate::core::bm25_index::BM25Index>,
106 graph_edges: Option<&mut Vec<IndexEdge>>,
107 session_cache: Option<&mut crate::core::cache::SessionCache>,
108) -> ConsolidationResult {
109 let mut result = ConsolidationResult::default();
110
111 if let Some(index) = bm25 {
112 result.chunks_indexed = index.ingest_content_chunks(artifacts.bm25_chunks.clone());
113 }
114
115 if let Some(edges) = graph_edges {
116 result.edges_created = cross_source_edges::merge_edges(edges, artifacts.edges.clone());
117 }
118
119 result.facts_extracted = artifacts.facts.len();
120
121 if let Some(cache) = session_cache {
122 for entry in &artifacts.cache_entries {
123 cache.store(&entry.uri, &entry.content);
124 result.cache_entries_stored += 1;
125 }
126 }
127
128 result
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134 use crate::core::bm25_index::{BM25Index, ChunkKind};
135 use crate::core::cache::SessionCache;
136 use crate::core::content_chunk::ContentChunk;
137
138 fn sample_chunks() -> Vec<ContentChunk> {
139 vec![
140 ContentChunk::from_provider(
141 "github",
142 "issues",
143 "42",
144 "Auth token bug",
145 ChunkKind::Issue,
146 "Token expires too early in src/auth.rs".into(),
147 vec!["src/auth.rs".into()],
148 Some(serde_json::json!({"state": "open", "labels": ["bug"]})),
149 ),
150 ContentChunk::from_provider(
151 "github",
152 "pull_requests",
153 "100",
154 "Fix auth expiry",
155 ChunkKind::PullRequest,
156 "Fixes token lifetime calculation in src/auth.rs".into(),
157 vec!["src/auth.rs".into()],
158 Some(serde_json::json!({"state": "open"})),
159 ),
160 ]
161 }
162
163 #[test]
164 fn consolidate_produces_all_artifact_types() {
165 let chunks = sample_chunks();
166 let artifacts = consolidate(&chunks);
167
168 assert!(!artifacts.is_empty());
169 assert_eq!(artifacts.bm25_chunks.len(), 2);
170 assert!(!artifacts.edges.is_empty());
171 assert!(!artifacts.facts.is_empty());
172 assert_eq!(artifacts.cache_entries.len(), 2);
173 }
174
175 #[test]
176 fn consolidate_empty_input_produces_empty_artifacts() {
177 let artifacts = consolidate(&[]);
178 assert!(artifacts.is_empty());
179 }
180
181 #[test]
182 fn consolidate_code_only_produces_empty_external_artifacts() {
183 let code = ContentChunk::from(crate::core::bm25_index::CodeChunk {
184 file_path: "src/main.rs".into(),
185 symbol_name: "main".into(),
186 kind: ChunkKind::Function,
187 start_line: 1,
188 end_line: 5,
189 content: "fn main() {}".into(),
190 tokens: vec![],
191 token_count: 0,
192 });
193 let artifacts = consolidate(&[code]);
194 assert!(artifacts.edges.is_empty());
195 assert!(artifacts.facts.is_empty());
196 assert!(artifacts.cache_entries.is_empty());
197 }
198
199 #[test]
200 fn consolidation_summary_counts_correctly() {
201 let chunks = sample_chunks();
202 let artifacts = consolidate(&chunks);
203 let summary = artifacts.summary();
204
205 assert_eq!(summary.chunks_indexed, 2);
206 assert!(summary.edges_created > 0);
207 assert!(summary.facts_extracted > 0);
208 assert_eq!(summary.cache_entries_stored, 2);
209 }
210
211 #[test]
212 fn apply_artifacts_to_bm25() {
213 let chunks = sample_chunks();
214 let artifacts = consolidate(&chunks);
215
216 let mut index = BM25Index {
217 chunks: Vec::new(),
218 inverted: std::collections::HashMap::new(),
219 avg_doc_len: 0.0,
220 doc_count: 0,
221 doc_freqs: std::collections::HashMap::new(),
222 files: std::collections::HashMap::new(),
223 };
224
225 let result = apply_artifacts(&artifacts, Some(&mut index), None, None);
226 assert_eq!(result.chunks_indexed, 2);
227 assert_eq!(index.doc_count, 2);
228 assert_eq!(index.external_chunk_count(), 2);
229 }
230
231 #[test]
232 fn apply_artifacts_to_graph() {
233 let chunks = sample_chunks();
234 let artifacts = consolidate(&chunks);
235
236 let mut edges: Vec<IndexEdge> = Vec::new();
237 let result = apply_artifacts(&artifacts, None, Some(&mut edges), None);
238
239 assert!(result.edges_created > 0);
240 assert!(!edges.is_empty());
241 assert!(edges.iter().any(|e| e.to == "src/auth.rs"));
242 }
243
244 #[test]
245 fn apply_artifacts_to_session_cache() {
246 let chunks = sample_chunks();
247 let artifacts = consolidate(&chunks);
248
249 let mut cache = SessionCache::new();
250 let result = apply_artifacts(&artifacts, None, None, Some(&mut cache));
251
252 assert_eq!(result.cache_entries_stored, 2);
253 assert!(cache.get("github://issues/42").is_some());
254 assert!(cache.get("github://pull_requests/100").is_some());
255 }
256
257 #[test]
258 fn apply_artifacts_to_all_systems() {
259 let chunks = sample_chunks();
260 let artifacts = consolidate(&chunks);
261
262 let mut index = BM25Index {
263 chunks: Vec::new(),
264 inverted: std::collections::HashMap::new(),
265 avg_doc_len: 0.0,
266 doc_count: 0,
267 doc_freqs: std::collections::HashMap::new(),
268 files: std::collections::HashMap::new(),
269 };
270 let mut edges: Vec<IndexEdge> = Vec::new();
271 let mut cache = SessionCache::new();
272
273 let result = apply_artifacts(
274 &artifacts,
275 Some(&mut index),
276 Some(&mut edges),
277 Some(&mut cache),
278 );
279
280 assert!(result.chunks_indexed > 0);
281 assert!(result.edges_created > 0);
282 assert!(result.facts_extracted > 0);
283 assert!(result.cache_entries_stored > 0);
284 }
285}