use crate::core::content_chunk::ContentChunk;
use crate::core::cross_source_edges;
use crate::core::graph_index::IndexEdge;
use crate::core::knowledge_provider_extract::{self, ExtractedFact};
#[derive(Debug, Clone, Default)]
pub struct ConsolidationResult {
pub chunks_indexed: usize,
pub edges_created: usize,
pub facts_extracted: usize,
pub cache_entries_stored: usize,
}
pub fn consolidate(chunks: &[ContentChunk]) -> ConsolidationArtifacts {
let external_chunks: Vec<&ContentChunk> = chunks.iter().filter(|c| c.is_external()).collect();
if external_chunks.is_empty() {
return ConsolidationArtifacts::default();
}
let edges = cross_source_edges::extract_cross_source_edges(chunks);
let facts = knowledge_provider_extract::extract_facts(chunks);
let cache_entries: Vec<CacheableProviderResult> = external_chunks
.iter()
.map(|c| CacheableProviderResult {
uri: c.file_path.clone(),
content: c.content.clone(),
token_count: c.token_count,
})
.collect();
ConsolidationArtifacts {
bm25_chunks: chunks.to_vec(),
edges,
facts,
cache_entries,
}
}
#[derive(Debug, Clone, Default)]
pub struct ConsolidationArtifacts {
pub bm25_chunks: Vec<ContentChunk>,
pub edges: Vec<IndexEdge>,
pub facts: Vec<ExtractedFact>,
pub cache_entries: Vec<CacheableProviderResult>,
}
impl ConsolidationArtifacts {
pub fn is_empty(&self) -> bool {
self.bm25_chunks.is_empty()
&& self.edges.is_empty()
&& self.facts.is_empty()
&& self.cache_entries.is_empty()
}
pub fn summary(&self) -> ConsolidationResult {
ConsolidationResult {
chunks_indexed: self.bm25_chunks.iter().filter(|c| c.is_external()).count(),
edges_created: self.edges.len(),
facts_extracted: self.facts.len(),
cache_entries_stored: self.cache_entries.len(),
}
}
}
#[derive(Debug, Clone)]
pub struct CacheableProviderResult {
pub uri: String,
pub content: String,
pub token_count: usize,
}
pub fn apply_artifacts(
artifacts: &ConsolidationArtifacts,
bm25: Option<&mut crate::core::bm25_index::BM25Index>,
graph_edges: Option<&mut Vec<IndexEdge>>,
session_cache: Option<&mut crate::core::cache::SessionCache>,
) -> ConsolidationResult {
let mut result = ConsolidationResult::default();
if let Some(index) = bm25 {
result.chunks_indexed = index.ingest_content_chunks(artifacts.bm25_chunks.clone());
}
if let Some(edges) = graph_edges {
result.edges_created = cross_source_edges::merge_edges(edges, artifacts.edges.clone());
}
result.facts_extracted = artifacts.facts.len();
if let Some(cache) = session_cache {
for entry in &artifacts.cache_entries {
cache.store(&entry.uri, &entry.content);
result.cache_entries_stored += 1;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::bm25_index::{BM25Index, ChunkKind};
use crate::core::cache::SessionCache;
use crate::core::content_chunk::ContentChunk;
fn sample_chunks() -> Vec<ContentChunk> {
vec![
ContentChunk::from_provider(
"github",
"issues",
"42",
"Auth token bug",
ChunkKind::Issue,
"Token expires too early in src/auth.rs".into(),
vec!["src/auth.rs".into()],
Some(serde_json::json!({"state": "open", "labels": ["bug"]})),
),
ContentChunk::from_provider(
"github",
"pull_requests",
"100",
"Fix auth expiry",
ChunkKind::PullRequest,
"Fixes token lifetime calculation in src/auth.rs".into(),
vec!["src/auth.rs".into()],
Some(serde_json::json!({"state": "open"})),
),
]
}
#[test]
fn consolidate_produces_all_artifact_types() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
assert!(!artifacts.is_empty());
assert_eq!(artifacts.bm25_chunks.len(), 2);
assert!(!artifacts.edges.is_empty());
assert!(!artifacts.facts.is_empty());
assert_eq!(artifacts.cache_entries.len(), 2);
}
#[test]
fn consolidate_empty_input_produces_empty_artifacts() {
let artifacts = consolidate(&[]);
assert!(artifacts.is_empty());
}
#[test]
fn consolidate_code_only_produces_empty_external_artifacts() {
let code = ContentChunk::from(crate::core::bm25_index::CodeChunk {
file_path: "src/main.rs".into(),
symbol_name: "main".into(),
kind: ChunkKind::Function,
start_line: 1,
end_line: 5,
content: "fn main() {}".into(),
tokens: vec![],
token_count: 0,
});
let artifacts = consolidate(&[code]);
assert!(artifacts.edges.is_empty());
assert!(artifacts.facts.is_empty());
assert!(artifacts.cache_entries.is_empty());
}
#[test]
fn consolidation_summary_counts_correctly() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
let summary = artifacts.summary();
assert_eq!(summary.chunks_indexed, 2);
assert!(summary.edges_created > 0);
assert!(summary.facts_extracted > 0);
assert_eq!(summary.cache_entries_stored, 2);
}
#[test]
fn apply_artifacts_to_bm25() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
let mut index = BM25Index {
chunks: Vec::new(),
inverted: std::collections::HashMap::new(),
avg_doc_len: 0.0,
doc_count: 0,
doc_freqs: std::collections::HashMap::new(),
files: std::collections::HashMap::new(),
};
let result = apply_artifacts(&artifacts, Some(&mut index), None, None);
assert_eq!(result.chunks_indexed, 2);
assert_eq!(index.doc_count, 2);
assert_eq!(index.external_chunk_count(), 2);
}
#[test]
fn apply_artifacts_to_graph() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
let mut edges: Vec<IndexEdge> = Vec::new();
let result = apply_artifacts(&artifacts, None, Some(&mut edges), None);
assert!(result.edges_created > 0);
assert!(!edges.is_empty());
assert!(edges.iter().any(|e| e.to == "src/auth.rs"));
}
#[test]
fn apply_artifacts_to_session_cache() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
let mut cache = SessionCache::new();
let result = apply_artifacts(&artifacts, None, None, Some(&mut cache));
assert_eq!(result.cache_entries_stored, 2);
assert!(cache.get("github://issues/42").is_some());
assert!(cache.get("github://pull_requests/100").is_some());
}
#[test]
fn apply_artifacts_to_all_systems() {
let chunks = sample_chunks();
let artifacts = consolidate(&chunks);
let mut index = BM25Index {
chunks: Vec::new(),
inverted: std::collections::HashMap::new(),
avg_doc_len: 0.0,
doc_count: 0,
doc_freqs: std::collections::HashMap::new(),
files: std::collections::HashMap::new(),
};
let mut edges: Vec<IndexEdge> = Vec::new();
let mut cache = SessionCache::new();
let result = apply_artifacts(
&artifacts,
Some(&mut index),
Some(&mut edges),
Some(&mut cache),
);
assert!(result.chunks_indexed > 0);
assert!(result.edges_created > 0);
assert!(result.facts_extracted > 0);
assert!(result.cache_entries_stored > 0);
}
}