1pub mod binary_index;
21mod chunker;
22mod falsification;
23pub mod fingerprint;
24mod indexer;
25pub mod persistence;
26pub mod profiling;
27pub mod quantization;
28pub mod query_cache;
29mod retriever;
30pub mod tui;
31mod types;
32mod validator;
33
34#[allow(unused_imports)]
36pub use binary_index::{
37 BinaryIndexError, BinaryIndexReader, BinaryIndexWriter, DocumentEntry, IndexHeader, Posting,
38 MAGIC, VERSION,
39};
40#[allow(unused_imports)]
41pub use chunker::SemanticChunker;
42#[allow(unused_imports)]
43pub use fingerprint::{blake3_hash, ChunkerConfig, DocumentFingerprint};
44#[allow(unused_imports)]
45pub use indexer::HeijunkaReindexer;
46#[allow(unused_imports)]
48pub use profiling::{
49 get_summary, record_cache_hit, record_cache_miss, record_query_latency, reset_metrics, span,
50 Counter, Histogram, HistogramBucket, MetricsSummary, RagMetrics, SpanStats, TimedSpan,
51 GLOBAL_METRICS,
52};
53#[allow(unused_imports)]
55pub use query_cache::{CacheStats, CachedPlan, QueryPlanCache};
56#[allow(unused_imports)]
58pub use quantization::{
59 CalibrationStats, QuantizationError, QuantizationParams, QuantizedEmbedding, RescoreResult,
60 RescoreRetriever, RescoreRetrieverConfig, SimdBackend,
61};
62#[allow(unused_imports)]
63pub use retriever::{HybridRetriever, InvertedIndex};
64#[allow(unused_imports)]
65pub use types::RetrievalResult;
66#[allow(unused_imports)]
67pub use types::*;
68#[allow(unused_imports)]
69pub use validator::JidokaIndexValidator;
70
71use serde::{Deserialize, Serialize};
72use std::collections::HashMap;
73use std::path::PathBuf;
74
75#[derive(Debug)]
83pub struct RagOracle {
84 index: DocumentIndex,
86 retriever: HybridRetriever,
88 validator: JidokaIndexValidator,
90 config: RagOracleConfig,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct RagOracleConfig {
97 pub repositories: Vec<PathBuf>,
99 pub sources: Vec<DocumentSource>,
101 pub chunk_size: usize,
103 pub chunk_overlap: usize,
105 pub top_k: usize,
107 pub rerank_depth: usize,
109}
110
111impl Default for RagOracleConfig {
112 fn default() -> Self {
113 Self {
114 repositories: vec![],
115 sources: vec![
116 DocumentSource::ClaudeMd,
117 DocumentSource::ReadmeMd,
118 DocumentSource::CargoToml,
119 DocumentSource::DocsDir,
120 ],
121 chunk_size: 512,
122 chunk_overlap: 64,
123 top_k: 5,
124 rerank_depth: 20,
125 }
126 }
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
131pub enum DocumentSource {
132 ClaudeMd,
134 ReadmeMd,
136 CargoToml,
138 PyProjectToml,
140 DocsDir,
142 ExamplesDir,
144 Docstrings,
146 PythonSource,
148 PythonTests,
150}
151impl DocumentSource {
152 pub fn priority(&self) -> u8 {
154 match self {
155 Self::ClaudeMd => 0,
156 Self::ReadmeMd | Self::CargoToml | Self::PyProjectToml => 1,
157 Self::DocsDir | Self::PythonSource => 2,
158 Self::ExamplesDir | Self::Docstrings | Self::PythonTests => 3,
159 }
160 }
161
162 pub fn glob_pattern(&self) -> &'static str {
164 match self {
165 Self::ClaudeMd => "CLAUDE.md",
166 Self::ReadmeMd => "README.md",
167 Self::CargoToml => "Cargo.toml",
168 Self::PyProjectToml => "pyproject.toml",
169 Self::DocsDir => "docs/**/*.md",
170 Self::ExamplesDir => "examples/**/*.rs",
171 Self::Docstrings => "src/**/*.rs",
172 Self::PythonSource => "src/**/*.py",
173 Self::PythonTests => "tests/**/*.py",
174 }
175 }
176}
177
178#[derive(Debug, Default, Clone, Serialize, Deserialize)]
180pub struct DocumentIndex {
181 documents: HashMap<String, IndexedDocument>,
183 fingerprints: HashMap<String, DocumentFingerprint>,
185 total_chunks: usize,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct IndexedDocument {
192 pub id: String,
194 pub component: String,
196 pub path: PathBuf,
198 pub source_type: DocumentSource,
200 pub chunks: Vec<DocumentChunk>,
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct DocumentChunk {
207 pub id: String,
209 pub content: String,
211 pub start_line: usize,
213 pub end_line: usize,
215 pub content_hash: [u8; 32],
217}
218impl RagOracle {
219 pub fn new() -> Self {
221 Self::with_config(RagOracleConfig::default())
222 }
223
224 pub fn with_config(config: RagOracleConfig) -> Self {
226 Self {
227 index: DocumentIndex::default(),
228 retriever: HybridRetriever::new(),
229 validator: JidokaIndexValidator::new(384), config,
231 }
232 }
233
234 pub fn query(&self, query: &str) -> Vec<RetrievalResult> {
236 self.retriever.retrieve(query, &self.index, self.config.top_k)
237 }
238
239 pub fn stats(&self) -> IndexStats {
241 IndexStats {
242 total_documents: self.index.documents.len(),
243 total_chunks: self.index.total_chunks,
244 components: self
245 .index
246 .documents
247 .values()
248 .map(|d| d.component.clone())
249 .collect::<std::collections::HashSet<_>>()
250 .len(),
251 }
252 }
253
254 pub fn needs_reindex(&self, doc_id: &str, current_hash: [u8; 32]) -> bool {
256 self.index
257 .fingerprints
258 .get(doc_id)
259 .map(|fp| fp.content_hash != current_hash)
260 .unwrap_or(true)
261 }
262}
263
264impl Default for RagOracle {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270#[derive(Debug, Clone)]
272pub struct IndexStats {
273 pub total_documents: usize,
275 pub total_chunks: usize,
277 pub components: usize,
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_rag_oracle_creation() {
287 let oracle = RagOracle::new();
288 let stats = oracle.stats();
289 assert_eq!(stats.total_documents, 0);
290 assert_eq!(stats.total_chunks, 0);
291 }
292
293 #[test]
294 fn test_rag_oracle_default() {
295 let oracle = RagOracle::default();
296 let stats = oracle.stats();
297 assert_eq!(stats.total_documents, 0);
298 assert_eq!(stats.components, 0);
299 }
300
301 #[test]
302 fn test_rag_oracle_with_config() {
303 let config = RagOracleConfig {
304 repositories: vec![PathBuf::from("/test")],
305 sources: vec![DocumentSource::ClaudeMd],
306 chunk_size: 256,
307 chunk_overlap: 32,
308 top_k: 10,
309 rerank_depth: 50,
310 };
311 let oracle = RagOracle::with_config(config);
312 let stats = oracle.stats();
313 assert_eq!(stats.total_documents, 0);
314 }
315
316 #[test]
317 fn test_rag_oracle_query_empty_index() {
318 let oracle = RagOracle::new();
319 let results = oracle.query("test query");
320 assert!(results.is_empty());
321 }
322
323 #[test]
324 fn test_document_source_priority() {
325 assert_eq!(DocumentSource::ClaudeMd.priority(), 0);
326 assert_eq!(DocumentSource::ReadmeMd.priority(), 1);
327 assert_eq!(DocumentSource::CargoToml.priority(), 1);
328 assert_eq!(DocumentSource::PyProjectToml.priority(), 1);
329 assert_eq!(DocumentSource::DocsDir.priority(), 2);
330 assert_eq!(DocumentSource::PythonSource.priority(), 2);
331 assert_eq!(DocumentSource::ExamplesDir.priority(), 3);
332 assert_eq!(DocumentSource::Docstrings.priority(), 3);
333 assert_eq!(DocumentSource::PythonTests.priority(), 3);
334 }
335
336 #[test]
337 fn test_document_source_glob_patterns() {
338 assert_eq!(DocumentSource::ClaudeMd.glob_pattern(), "CLAUDE.md");
339 assert_eq!(DocumentSource::ReadmeMd.glob_pattern(), "README.md");
340 assert_eq!(DocumentSource::CargoToml.glob_pattern(), "Cargo.toml");
341 assert_eq!(DocumentSource::PyProjectToml.glob_pattern(), "pyproject.toml");
342 assert_eq!(DocumentSource::DocsDir.glob_pattern(), "docs/**/*.md");
343 assert_eq!(DocumentSource::ExamplesDir.glob_pattern(), "examples/**/*.rs");
344 assert_eq!(DocumentSource::Docstrings.glob_pattern(), "src/**/*.rs");
345 assert_eq!(DocumentSource::PythonSource.glob_pattern(), "src/**/*.py");
346 assert_eq!(DocumentSource::PythonTests.glob_pattern(), "tests/**/*.py");
347 }
348
349 #[test]
350 fn test_config_defaults() {
351 let config = RagOracleConfig::default();
352 assert_eq!(config.chunk_size, 512);
353 assert_eq!(config.chunk_overlap, 64);
354 assert_eq!(config.top_k, 5);
355 assert_eq!(config.rerank_depth, 20);
356 assert!(config.repositories.is_empty());
357 assert!(!config.sources.is_empty());
358 }
359
360 #[test]
361 fn test_config_default_sources() {
362 let config = RagOracleConfig::default();
363 assert!(config.sources.contains(&DocumentSource::ClaudeMd));
364 assert!(config.sources.contains(&DocumentSource::ReadmeMd));
365 assert!(config.sources.contains(&DocumentSource::CargoToml));
366 assert!(config.sources.contains(&DocumentSource::DocsDir));
367 }
368
369 #[test]
370 fn test_needs_reindex_new_document() {
371 let oracle = RagOracle::new();
372 let hash = [0u8; 32];
373 assert!(oracle.needs_reindex("new_doc", hash));
374 }
375
376 #[test]
377 fn test_document_index_default() {
378 let index = DocumentIndex::default();
379 assert!(index.documents.is_empty());
380 assert!(index.fingerprints.is_empty());
381 assert_eq!(index.total_chunks, 0);
382 }
383
384 #[test]
385 fn test_index_stats_components() {
386 let oracle = RagOracle::new();
387 let stats = oracle.stats();
388 assert_eq!(stats.components, 0);
389 }
390
391 mod proptests {
393 use super::*;
394 use proptest::prelude::*;
395
396 proptest! {
397 #![proptest_config(ProptestConfig::with_cases(50))]
398
399 #[test]
401 fn prop_empty_oracle_returns_empty(query in "[a-z ]{1,100}") {
402 let oracle = RagOracle::new();
403 let results = oracle.query(&query);
404 prop_assert!(results.is_empty());
405 }
406
407 #[test]
409 fn prop_config_overlap_less_than_size(
410 chunk_size in 64usize..1024,
411 overlap_factor in 0.0f64..0.5
412 ) {
413 let overlap = (chunk_size as f64 * overlap_factor) as usize;
414 let config = RagOracleConfig {
415 chunk_size,
416 chunk_overlap: overlap,
417 ..Default::default()
418 };
419 prop_assert!(config.chunk_overlap <= config.chunk_size);
420 }
421
422 #[test]
424 fn prop_needs_reindex_new_doc(doc_id in "[a-z]{3,20}", hash in prop::array::uniform32(0u8..)) {
425 let oracle = RagOracle::new();
426 prop_assert!(oracle.needs_reindex(&doc_id, hash));
427 }
428
429 #[test]
431 fn prop_source_priority_valid(source_idx in 0usize..9) {
432 let sources = [
433 DocumentSource::ClaudeMd,
434 DocumentSource::ReadmeMd,
435 DocumentSource::CargoToml,
436 DocumentSource::PyProjectToml,
437 DocumentSource::DocsDir,
438 DocumentSource::ExamplesDir,
439 DocumentSource::Docstrings,
440 DocumentSource::PythonSource,
441 DocumentSource::PythonTests,
442 ];
443 let source = sources[source_idx];
444 prop_assert!(source.priority() <= 3);
445 }
446
447 #[test]
449 fn prop_glob_pattern_nonempty(source_idx in 0usize..9) {
450 let sources = [
451 DocumentSource::ClaudeMd,
452 DocumentSource::ReadmeMd,
453 DocumentSource::CargoToml,
454 DocumentSource::PyProjectToml,
455 DocumentSource::DocsDir,
456 DocumentSource::ExamplesDir,
457 DocumentSource::Docstrings,
458 DocumentSource::PythonSource,
459 DocumentSource::PythonTests,
460 ];
461 let source = sources[source_idx];
462 prop_assert!(!source.glob_pattern().is_empty());
463 }
464
465 #[test]
467 fn prop_stats_consistent(_seed in 0u64..1000) {
468 let oracle = RagOracle::new();
469 let stats = oracle.stats();
470 prop_assert_eq!(stats.total_documents, 0);
472 prop_assert_eq!(stats.total_chunks, 0);
473 prop_assert_eq!(stats.components, 0);
474 }
475 }
476 }
477}