mnemefusion-core 0.1.4

Unified memory engine for AI applications - Core library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
//! Configuration types for MnemeFusion
//!
//! This module defines configuration options for the memory engine.

use crate::query::FusionStrategy;

/// Configuration for the MnemeFusion memory engine
#[derive(Debug, Clone)]
pub struct Config {
    /// Dimension of embedding vectors
    pub embedding_dim: usize,

    /// Half-life for temporal decay in hours
    /// After this many hours, a memory's temporal relevance score is halved
    pub temporal_decay_hours: f32,

    /// Maximum number of hops for causal graph traversal
    pub causal_max_hops: usize,

    /// Enable automatic entity extraction
    ///
    /// **Language Note**: Entity extraction currently uses English-only stop words
    /// and capitalization rules. For non-English content, consider disabling this
    /// feature and using your own NER pipeline or relying on semantic search.
    ///
    /// See documentation for multilingual usage examples.
    pub entity_extraction_enabled: bool,

    /// Minimum confidence threshold for causal links (0.0 to 1.0)
    pub causal_min_confidence: f32,

    /// HNSW M parameter (connectivity)
    /// Higher values = better recall, more memory
    pub hnsw_m: usize,

    /// HNSW ef_construction parameter
    /// Higher values = better index quality, slower construction
    pub hnsw_ef_construction: usize,

    /// HNSW ef_search parameter
    /// Higher values = better recall, slower search
    pub hnsw_ef_search: usize,

    /// Metadata fields to index for efficient filtering
    /// These fields will have dedicated indexes for fast lookup
    /// Example: vec!["type".to_string(), "category".to_string(), "priority".to_string()]
    pub indexed_metadata: Vec<String>,

    /// Minimum semantic similarity threshold for fusion results (0.0 to 1.0)
    ///
    /// Memories with semantic_score below this threshold are excluded from 4D fusion results.
    /// This ensures that semantic relevance is mandatory - other dimensions (temporal, entity,
    /// causal) can only boost already-relevant memories, not surface irrelevant ones.
    ///
    /// Default: 0.15 (15% minimum semantic relevance)
    /// Recommended range: 0.10 to 0.20
    ///
    /// Lower values (e.g., 0.05): More permissive, may include weakly relevant memories
    /// Higher values (e.g., 0.30): Stricter, only strongly relevant memories
    ///
    /// Set to 0.0 to disable the filter (not recommended for production)
    pub fusion_semantic_threshold: f32,

    /// Pre-fusion semantic filtering threshold (0.0 to 1.0)
    ///
    /// Semantic search results below this threshold are filtered OUT before fusion.
    /// This is stricter than fusion_semantic_threshold and reduces noise in the semantic pathway.
    ///
    /// Default: 0.3 (30% minimum cosine similarity)
    /// Recommended range: 0.20 to 0.40
    ///
    /// This helps improve precision by removing low-quality semantic matches early.
    /// Set to 0.0 to disable pre-fusion filtering (uses only fusion_semantic_threshold).
    pub semantic_prefilter_threshold: f32,

    /// Fusion strategy (Weighted or ReciprocalRank)
    ///
    /// - Weighted: Uses intent-adaptive weights (original approach)
    /// - ReciprocalRank: Uses RRF formula (robust to score scale differences)
    ///
    /// Default: ReciprocalRank
    pub fusion_strategy: FusionStrategy,

    /// RRF k parameter (only used when fusion_strategy is ReciprocalRank)
    ///
    /// Default: 60 (from Cormack et al. 2009 RRF paper)
    pub rrf_k: f32,

    /// SLM configuration (optional)
    ///
    /// When enabled, uses Small Language Model for semantic intent classification
    /// instead of pattern matching. Improves classification accuracy from ~35% to 85%+.
    ///
    /// Default: None (disabled)
    ///
    /// Requires `slm` feature to be enabled at compile time:
    /// ```toml
    /// mnemefusion-core = { version = "0.1", features = ["slm"] }
    /// ```
    pub slm_config: Option<crate::slm::SlmConfig>,

    /// Enable SLM metadata extraction at ingestion time
    ///
    /// When enabled and slm_config is set, uses Small Language Model to extract rich
    /// metadata (entities, temporal markers, causal relationships, topics) during
    /// memory ingestion. This enables fast, accurate retrieval without query-time
    /// SLM inference.
    ///
    /// Key principle: "Pay the cost once" - ingestion can be slow (3-5s), but queries
    /// must be fast (<100ms).
    ///
    /// Default: true (enabled when slm_config is set)
    ///
    /// Requires `slm` feature to be enabled at compile time:
    /// ```toml
    /// mnemefusion-core = { version = "0.1", features = ["slm"] }
    /// ```
    pub slm_metadata_extraction_enabled: bool,

    /// Enable SLM query classification at query time
    ///
    /// When enabled and slm_config is set, uses Small Language Model to classify
    /// query intent at query time. This adds ~3 seconds latency to every query.
    ///
    /// **IMPORTANT**: This is typically NOT recommended for production use.
    /// With rich metadata extracted at ingestion time, RRF fusion
    /// can automatically balance retrieval pathways without classification.
    ///
    /// Default: false (disabled - rely on RRF fusion instead)
    ///
    /// Only enable for:
    /// - Experimentation and benchmarking
    /// - Comparing classification vs no-classification approaches
    /// - Cases where ingestion-time metadata is unavailable
    ///
    /// Requires `slm` feature to be enabled at compile time:
    /// ```toml
    /// mnemefusion-core = { version = "0.1", features = ["slm"] }
    /// ```
    pub slm_query_classification_enabled: bool,

    /// Number of LLM extraction passes per document during ingestion
    ///
    /// Multiple passes with different temperatures capture different facts,
    /// producing richer entity profiles. Pass 0 uses deterministic settings
    /// (temp=0.1), subsequent passes use moderate diversity (temp=0.3) with
    /// unique seeds.
    ///
    /// Default: 1 (single pass, backward compatible)
    /// Recommended for quality: 3
    ///
    /// Higher values increase ingestion time linearly but improve profile
    /// completeness. Facts are deduplicated across passes via add_fact().
    pub extraction_passes: usize,

    /// Entity types that get their own profiles during extraction.
    ///
    /// Only entities whose `entity_type` (from LLM extraction) matches one of
    /// these types (case-insensitive) will have profiles created. Others are
    /// skipped to prevent junk profiles from generic nouns, pronouns, and
    /// concepts (e.g., "dogs", "They", "basketball").
    ///
    /// Default: `["person", "organization", "location"]`
    ///
    /// Set to empty to allow all entity types (not recommended — causes
    /// profile explosion with diverse extraction prompts).
    pub profile_entity_types: Vec<String>,

    /// Adaptive-K threshold for dynamic result count selection (Top-p / nucleus).
    ///
    /// Instead of always returning exactly `limit` results, applies nucleus (Top-p)
    /// selection on fused scores: converts scores to probabilities via softmax, then
    /// returns results until cumulative probability >= this threshold.
    ///
    /// This prevents low-quality padding from diluting the context window. When the
    /// top few results have high scores and the rest are noise, Adaptive-K returns
    /// only the high-confidence results.
    ///
    /// Research basis: Calvin Ku's Adaptive-K fork of EmergenceMem Simple Fast.
    /// Top-p nucleus selection (p=0.7) with token budget. Average k varies per query.
    ///
    /// Default: 0.0 (disabled — always returns exactly `limit` results)
    /// Recommended: 0.7 (return results covering 70% of probability mass)
    /// Set to 0.0 to disable (always return `limit` results)
    /// Set to 1.0 to always return all results (equivalent to disabled)
    ///
    /// When enabled, results are bounded by [limit/3, limit] to prevent
    /// pathological cases (too few or too many results).
    pub adaptive_k_threshold: f32,

    /// Path to a local embedding model directory for automatic text vectorization.
    ///
    /// When set, `add()` and `query()` can be called without supplying an embedding
    /// vector — the engine computes it automatically using fastembed (ONNX Runtime).
    ///
    /// The path should be the root of a fastembed/HF-hub cache directory containing
    /// BGE-base-en-v1.5 model files. Pre-download with:
    /// `python -c "from fastembed import TextEmbedding; TextEmbedding(['hi'])"`
    ///
    /// Requires the `embedding-onnx` feature at compile time.
    ///
    /// Default: `None` (caller must supply embeddings explicitly)
    pub embedding_model: Option<String>,

    /// Path to the LLM model file (.gguf) for entity extraction.
    ///
    /// Convenience alias — equivalent to calling
    /// `engine.with_llm_entity_extraction_from_path(path, ModelTier::Balanced)` after `open()`.
    ///
    /// Requires the `entity-extraction` feature at compile time.
    ///
    /// Default: `None` (entity extraction disabled unless wired separately)
    pub llm_model: Option<String>,

    /// Minimum content size (bytes) to trigger deferred LLM extraction.
    ///
    /// When LLM extraction is enabled and `content.len() >= async_extraction_threshold`,
    /// `add()` stores the memory immediately and queues LLM extraction for later.
    /// Call `flush_extraction_queue()` to process all deferred extractions.
    ///
    /// This allows `add()` to return in ~5ms for large content (instead of ~9s per
    /// document), enabling real-time AI agent usage without blocking conversation flow.
    ///
    /// Default: `0` (always synchronous — backward compatible).
    /// Recommended: `500` (defer documents >= 500 bytes; short messages still sync).
    /// Set to `1` to defer all content when LLM is enabled.
    pub async_extraction_threshold: usize,

    /// Enable pipeline tracing (query).
    ///
    /// When enabled, each `query()` call records a detailed step-by-step
    /// trace accessible via `last_query_trace()`.
    ///
    /// Adds minor overhead from HashMap allocations (~5-10%). Disabled by default.
    ///
    /// Default: `false`
    pub enable_trace: bool,
}

impl Default for Config {
    fn default() -> Self {
        Self {
            embedding_dim: 384,          // Default for all-MiniLM-L6-v2
            temporal_decay_hours: 168.0, // 1 week
            causal_max_hops: 3,
            entity_extraction_enabled: true,
            causal_min_confidence: 0.5,
            hnsw_m: 16,
            hnsw_ef_construction: 128,
            hnsw_ef_search: 64,
            indexed_metadata: Vec::new(), // No indexed fields by default
            fusion_semantic_threshold: 0.15, // 15% minimum semantic relevance
            semantic_prefilter_threshold: 0.3, // 30% pre-fusion filter; aggregation queries use 0.5x (0.15) via adaptive logic in planner
            fusion_strategy: FusionStrategy::default(), // RRF by default
            rrf_k: 60.0,                       // From RRF paper
            slm_config: None,                  // SLM disabled by default
            slm_metadata_extraction_enabled: true, // Enabled by default when slm_config is set
            slm_query_classification_enabled: false, // Disabled by default - rely on RRF fusion
            extraction_passes: 1,              // Single pass by default (backward compatible)
            profile_entity_types: vec![
                "person".to_string(),
                "organization".to_string(),
                "location".to_string(),
            ],
            adaptive_k_threshold: 0.0, // Disabled by default (always return exactly limit)
            embedding_model: None,
            llm_model: None,
            async_extraction_threshold: 0, // Always sync by default (backward compatible)
            enable_trace: false,           // Pipeline tracing off by default
        }
    }
}

impl Config {
    /// Create a new configuration with default values
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the embedding dimension
    pub fn with_embedding_dim(mut self, dim: usize) -> Self {
        self.embedding_dim = dim;
        self
    }

    /// Set the temporal decay half-life in hours
    pub fn with_temporal_decay_hours(mut self, hours: f32) -> Self {
        self.temporal_decay_hours = hours;
        self
    }

    /// Set the maximum causal traversal hops
    pub fn with_causal_max_hops(mut self, hops: usize) -> Self {
        self.causal_max_hops = hops;
        self
    }

    /// Enable or disable entity extraction
    ///
    /// **Language Note**: Entity extraction currently supports English only.
    /// If you're working with non-English content, set this to `false` and
    /// use multilingual embeddings for semantic search.
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::Config;
    ///
    /// // For English content (default)
    /// let config = Config::new().with_entity_extraction(true);
    ///
    /// // For non-English content
    /// let config = Config::new().with_entity_extraction(false);
    /// ```
    pub fn with_entity_extraction(mut self, enabled: bool) -> Self {
        self.entity_extraction_enabled = enabled;
        self
    }

    /// Set HNSW parameters for vector index
    pub fn with_hnsw_params(mut self, m: usize, ef_construction: usize, ef_search: usize) -> Self {
        self.hnsw_m = m;
        self.hnsw_ef_construction = ef_construction;
        self.hnsw_ef_search = ef_search;
        self
    }

    /// Set metadata fields to index for efficient filtering
    pub fn with_indexed_metadata(mut self, fields: Vec<String>) -> Self {
        self.indexed_metadata = fields;
        self
    }

    /// Add a metadata field to the indexed set
    pub fn add_indexed_field(mut self, field: impl Into<String>) -> Self {
        self.indexed_metadata.push(field.into());
        self
    }

    /// Set the minimum semantic similarity threshold for fusion results
    ///
    /// Memories with semantic_score below this threshold are excluded from 4D fusion results.
    /// This ensures that semantic relevance is mandatory.
    ///
    /// # Arguments
    ///
    /// * `threshold` - Minimum semantic score (0.0 to 1.0). Default: 0.15
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::Config;
    ///
    /// // Strict filter (only highly relevant memories)
    /// let config = Config::default().with_fusion_semantic_threshold(0.30);
    ///
    /// // Permissive filter (allow weakly relevant memories)
    /// let config = Config::default().with_fusion_semantic_threshold(0.05);
    ///
    /// // Disable filter (not recommended for production)
    /// let config = Config::default().with_fusion_semantic_threshold(0.0);
    /// ```
    pub fn with_fusion_semantic_threshold(mut self, threshold: f32) -> Self {
        self.fusion_semantic_threshold = threshold.clamp(0.0, 1.0);
        self
    }

    /// Set the fusion strategy
    ///
    /// # Arguments
    ///
    /// * `strategy` - Either Weighted or ReciprocalRank
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::{Config, query::FusionStrategy};
    ///
    /// // Use RRF (default, recommended)
    /// let config = Config::default().with_fusion_strategy(FusionStrategy::ReciprocalRank);
    ///
    /// // Use weighted fusion (original approach)
    /// let config = Config::default().with_fusion_strategy(FusionStrategy::Weighted);
    /// ```
    pub fn with_fusion_strategy(mut self, strategy: FusionStrategy) -> Self {
        self.fusion_strategy = strategy;
        self
    }

    /// Set the RRF k parameter
    ///
    /// Only used when fusion_strategy is ReciprocalRank.
    ///
    /// # Arguments
    ///
    /// * `k` - RRF constant (typically 60). Default: 60
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::Config;
    ///
    /// // Default k=60 (from RRF paper)
    /// let config = Config::default();
    ///
    /// // Custom k value
    /// let config = Config::default().with_rrf_k(100.0);
    /// ```
    pub fn with_rrf_k(mut self, k: f32) -> Self {
        self.rrf_k = k.max(1.0);
        self
    }

    /// Set the pre-fusion semantic filter threshold
    ///
    /// Semantic search results below this threshold are filtered out before fusion.
    /// This is stricter than fusion_semantic_threshold and helps improve precision.
    ///
    /// # Arguments
    ///
    /// * `threshold` - Minimum cosine similarity (0.0 to 1.0). Default: 0.3
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::Config;
    ///
    /// // Strict pre-filter (only high-quality semantic matches)
    /// let config = Config::default().with_semantic_prefilter_threshold(0.4);
    ///
    /// // Permissive pre-filter
    /// let config = Config::default().with_semantic_prefilter_threshold(0.2);
    ///
    /// // Disable pre-filter (use only fusion threshold)
    /// let config = Config::default().with_semantic_prefilter_threshold(0.0);
    /// ```
    pub fn with_semantic_prefilter_threshold(mut self, threshold: f32) -> Self {
        self.semantic_prefilter_threshold = threshold.clamp(0.0, 1.0);
        self
    }

    /// Enable SLM-based intent classification
    ///
    /// When enabled, uses Small Language Model (Gemma 3 1B) for semantic understanding
    /// of query intent, improving classification accuracy from ~35% to 85%+.
    ///
    /// Falls back to pattern-based classification on any error, ensuring zero regression.
    ///
    /// # Arguments
    ///
    /// * `slm_config` - SLM configuration including model ID and cache directory
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use mnemefusion_core::{Config, SlmConfig};
    ///
    /// // Enable SLM with default configuration
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::default());
    ///
    /// // Enable SLM with custom model
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::new("google/gemma-3-1b")
    ///         .with_timeout_ms(100)
    ///         .with_min_confidence(0.6));
    /// ```
    ///
    /// # Feature Flag
    ///
    /// Requires `slm` feature to be enabled:
    /// ```toml
    /// [dependencies]
    /// mnemefusion-core = { version = "0.1", features = ["slm"] }
    /// ```
    pub fn with_slm(mut self, slm_config: crate::slm::SlmConfig) -> Self {
        self.slm_config = Some(slm_config);
        self
    }

    /// Disable SLM classification (use pattern-based only)
    ///
    /// This is the default behavior. Use this method to explicitly disable SLM
    /// after it has been enabled.
    pub fn without_slm(mut self) -> Self {
        self.slm_config = None;
        self
    }

    /// Enable or disable SLM metadata extraction at ingestion time
    ///
    /// When enabled and slm_config is set, uses Small Language Model to extract
    /// rich metadata during memory ingestion. This metadata enables fast, accurate
    /// retrieval without query-time SLM inference.
    ///
    /// # Arguments
    ///
    /// * `enabled` - Whether to enable SLM metadata extraction
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use mnemefusion_core::{Config, SlmConfig};
    ///
    /// // Enable SLM for both classification and metadata extraction
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::default())
    ///     .with_slm_metadata_extraction(true);
    ///
    /// // Enable SLM for classification only, skip metadata extraction
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::default())
    ///     .with_slm_metadata_extraction(false);
    /// ```
    pub fn with_slm_metadata_extraction(mut self, enabled: bool) -> Self {
        self.slm_metadata_extraction_enabled = enabled;
        self
    }

    /// Enable or disable SLM query classification at query time
    ///
    /// When enabled and slm_config is set, uses Small Language Model to classify
    /// query intent. This adds ~3 seconds latency per query.
    ///
    /// **NOT RECOMMENDED** for production. With rich metadata from ingestion-time
    /// SLM extraction, RRF fusion handles pathway balancing automatically.
    ///
    /// # Arguments
    ///
    /// * `enabled` - Whether to enable SLM query classification
    ///
    /// # Examples
    ///
    /// ```no_run
    /// use mnemefusion_core::{Config, SlmConfig};
    ///
    /// // Default: SLM classification disabled at query time (recommended)
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::default());
    ///
    /// // Enable for experimentation only
    /// let config = Config::default()
    ///     .with_slm(SlmConfig::default())
    ///     .with_slm_query_classification(true);
    /// ```
    pub fn with_slm_query_classification(mut self, enabled: bool) -> Self {
        self.slm_query_classification_enabled = enabled;
        self
    }

    /// Set the number of LLM extraction passes per document
    ///
    /// Multiple passes with different temperatures produce richer entity profiles
    /// by capturing different facts each time. The first pass is deterministic;
    /// subsequent passes use moderate randomness.
    ///
    /// # Arguments
    ///
    /// * `passes` - Number of extraction passes (1..=10). Clamped to this range.
    ///
    /// # Examples
    ///
    /// ```
    /// use mnemefusion_core::Config;
    ///
    /// // Single pass (default, fastest)
    /// let config = Config::default().with_extraction_passes(1);
    ///
    /// // Three passes (recommended for quality)
    /// let config = Config::default().with_extraction_passes(3);
    /// ```
    pub fn with_extraction_passes(mut self, passes: usize) -> Self {
        self.extraction_passes = passes.clamp(1, 10);
        self
    }

    /// Set which entity types get their own profiles during extraction.
    ///
    /// Only entities matching these types (case-insensitive) will have profiles
    /// created. An empty list allows all entity types.
    ///
    /// # Arguments
    ///
    /// * `types` - List of allowed entity types (e.g., `["person", "organization"]`)
    pub fn with_profile_entity_types(mut self, types: Vec<String>) -> Self {
        self.profile_entity_types = types;
        self
    }

    /// Set the Adaptive-K (Top-p) threshold for dynamic result count selection.
    ///
    /// When > 0.0, applies nucleus selection to return only high-confidence results
    /// instead of always returning exactly `limit` results.
    ///
    /// # Arguments
    ///
    /// * `threshold` - Top-p threshold (0.0 to 1.0). 0.0 disables, 0.7 recommended.
    pub fn with_adaptive_k(mut self, threshold: f32) -> Self {
        self.adaptive_k_threshold = threshold.clamp(0.0, 1.0);
        self
    }

    /// Set the path to a local embedding model directory.
    ///
    /// When set, the engine automatically computes embeddings for `add()` and
    /// `query()` calls that do not supply an explicit embedding vector.
    ///
    /// Requires the `embedding-onnx` feature at compile time.
    pub fn with_embedding_model(mut self, path: impl Into<String>) -> Self {
        self.embedding_model = Some(path.into());
        self
    }

    /// Set the path to the LLM model file (.gguf) for entity extraction.
    ///
    /// Requires the `entity-extraction` feature at compile time.
    pub fn with_llm_model(mut self, path: impl Into<String>) -> Self {
        self.llm_model = Some(path.into());
        self
    }

    /// Set the content size threshold for deferred LLM extraction.
    ///
    /// Content >= `threshold` bytes will be stored immediately and queued for
    /// deferred LLM extraction. Call `flush_extraction_queue()` to process.
    /// Set to `0` to always run LLM extraction synchronously (default).
    pub fn with_async_extraction_threshold(mut self, threshold: usize) -> Self {
        self.async_extraction_threshold = threshold;
        self
    }

    /// Enable or disable pipeline tracing.
    ///
    /// When enabled, `query()` records step-by-step traces with timing and data
    /// at every pipeline stage. Access via `last_query_trace()`.
    pub fn with_trace(mut self, enabled: bool) -> Self {
        self.enable_trace = enabled;
        self
    }

    /// Validate the configuration
    ///
    /// Returns detailed errors if the configuration is invalid.
    ///
    /// **Note**: Also prints warnings to stderr for suboptimal configurations
    /// (e.g., entity extraction enabled for potentially non-English content).
    pub fn validate(&self) -> Result<(), crate::Error> {
        // Print warning for entity extraction (English-only feature)
        if self.entity_extraction_enabled {
            tracing::warn!(
                "Entity extraction is enabled. This feature currently supports English only. \
                For non-English content, consider disabling with .with_entity_extraction(false)"
            );
        }

        if self.embedding_dim == 0 {
            return Err(crate::Error::Configuration(
                "embedding_dim must be greater than 0. Common values: 384 (MiniLM), 768 (BERT), 1536 (OpenAI)".to_string(),
            ));
        }

        // Warn if dimension is unusually large
        if self.embedding_dim > 4096 {
            return Err(crate::Error::Configuration(
                format!("embedding_dim of {} is unusually large. This will consume significant memory. Typical values are 384-1536.", self.embedding_dim)
            ));
        }

        if self.temporal_decay_hours <= 0.0 {
            return Err(crate::Error::Configuration(
                "temporal_decay_hours must be positive. Recommended: 168.0 (1 week)".to_string(),
            ));
        }

        if self.causal_max_hops == 0 {
            return Err(crate::Error::Configuration(
                "causal_max_hops must be greater than 0. Recommended: 2-5".to_string(),
            ));
        }

        // Warn if hops is very large
        if self.causal_max_hops > 10 {
            return Err(crate::Error::Configuration(format!(
                "causal_max_hops of {} may be too large and cause slow queries. Recommended: 2-5",
                self.causal_max_hops
            )));
        }

        if self.causal_min_confidence < 0.0 || self.causal_min_confidence > 1.0 {
            return Err(crate::Error::Configuration(format!(
                "causal_min_confidence must be between 0.0 and 1.0, got {}",
                self.causal_min_confidence
            )));
        }

        if self.hnsw_m == 0 {
            return Err(crate::Error::Configuration(
                "hnsw_m must be greater than 0. Recommended: 12-48 (default: 16)".to_string(),
            ));
        }

        if self.hnsw_m > 100 {
            return Err(crate::Error::Configuration(format!(
                "hnsw_m of {} is very large and will consume excessive memory. Recommended: 12-48",
                self.hnsw_m
            )));
        }

        if self.hnsw_ef_construction < 10 {
            return Err(crate::Error::Configuration(
                "hnsw_ef_construction should be at least 10 for reasonable index quality. Recommended: 100-500".to_string(),
            ));
        }

        if self.hnsw_ef_search == 0 {
            return Err(crate::Error::Configuration(
                "hnsw_ef_search must be greater than 0. Recommended: 64-200".to_string(),
            ));
        }

        if self.extraction_passes == 0 || self.extraction_passes > 10 {
            return Err(crate::Error::Configuration(format!(
                "extraction_passes must be between 1 and 10, got {}. Recommended: 1-3",
                self.extraction_passes
            )));
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_default_config() {
        let config = Config::default();
        assert_eq!(config.embedding_dim, 384);
        assert_eq!(config.temporal_decay_hours, 168.0);
        assert_eq!(config.causal_max_hops, 3);
        assert!(config.entity_extraction_enabled);
    }

    #[test]
    fn test_config_builder() {
        let config = Config::new()
            .with_embedding_dim(512)
            .with_temporal_decay_hours(336.0)
            .with_causal_max_hops(5);

        assert_eq!(config.embedding_dim, 512);
        assert_eq!(config.temporal_decay_hours, 336.0);
        assert_eq!(config.causal_max_hops, 5);
    }

    #[test]
    fn test_config_validation() {
        let config = Config::default();
        assert!(config.validate().is_ok());

        let mut bad_config = Config::default();
        bad_config.embedding_dim = 0;
        assert!(bad_config.validate().is_err());

        let mut bad_config = Config::default();
        bad_config.temporal_decay_hours = -1.0;
        assert!(bad_config.validate().is_err());

        let mut bad_config = Config::default();
        bad_config.causal_min_confidence = 1.5;
        assert!(bad_config.validate().is_err());
    }

    #[test]
    fn test_indexed_metadata_config() {
        // Default should have no indexed fields
        let config = Config::default();
        assert!(config.indexed_metadata.is_empty());

        // Test with_indexed_metadata
        let config =
            Config::new().with_indexed_metadata(vec!["type".to_string(), "category".to_string()]);
        assert_eq!(config.indexed_metadata.len(), 2);
        assert!(config.indexed_metadata.contains(&"type".to_string()));
        assert!(config.indexed_metadata.contains(&"category".to_string()));

        // Test add_indexed_field
        let config = Config::new()
            .add_indexed_field("type")
            .add_indexed_field("priority");
        assert_eq!(config.indexed_metadata.len(), 2);
        assert!(config.indexed_metadata.contains(&"type".to_string()));
        assert!(config.indexed_metadata.contains(&"priority".to_string()));
    }

    #[test]
    fn test_config_validation_dimension_too_large() {
        let mut config = Config::default();
        config.embedding_dim = 5000;
        let err = config.validate().unwrap_err();
        assert!(matches!(err, crate::Error::Configuration(_)));
        assert!(err.to_string().contains("unusually large"));
    }

    #[test]
    fn test_config_validation_causal_hops_too_large() {
        let mut config = Config::default();
        config.causal_max_hops = 20;
        let err = config.validate().unwrap_err();
        assert!(matches!(err, crate::Error::Configuration(_)));
        assert!(err.to_string().contains("too large"));
    }

    #[test]
    fn test_config_validation_hnsw_m_too_large() {
        let mut config = Config::default();
        config.hnsw_m = 150;
        let err = config.validate().unwrap_err();
        assert!(matches!(err, crate::Error::Configuration(_)));
        assert!(err.to_string().contains("very large"));
    }

    #[test]
    fn test_config_validation_ef_construction_too_small() {
        let mut config = Config::default();
        config.hnsw_ef_construction = 5;
        let err = config.validate().unwrap_err();
        assert!(matches!(err, crate::Error::Configuration(_)));
        assert!(err.to_string().contains("at least 10"));
    }

    #[test]
    fn test_config_validation_ef_search_zero() {
        let mut config = Config::default();
        config.hnsw_ef_search = 0;
        let err = config.validate().unwrap_err();
        assert!(matches!(err, crate::Error::Configuration(_)));
    }

    #[test]
    fn test_config_validation_provides_recommendations() {
        // Test that error messages include recommendations
        let mut config = Config::default();
        config.embedding_dim = 0;
        let err = config.validate().unwrap_err();
        assert!(err.to_string().contains("Common values"));

        let mut config = Config::default();
        config.temporal_decay_hours = 0.0;
        let err = config.validate().unwrap_err();
        assert!(err.to_string().contains("Recommended"));
    }

    #[test]
    fn test_entity_extraction_warning() {
        // Entity extraction enabled should validate successfully but print warning
        let config = Config::default();
        assert!(config.entity_extraction_enabled); // Default is true

        // Should not error, just warn to stderr
        assert!(config.validate().is_ok());
    }

    #[test]
    fn test_entity_extraction_disabled_no_warning() {
        // Disabling entity extraction should validate without warnings
        let config = Config::new().with_entity_extraction(false);
        assert!(!config.entity_extraction_enabled);

        // Should validate successfully
        assert!(config.validate().is_ok());
    }

    #[test]
    fn test_slm_query_classification_default_disabled() {
        // SLM query classification should be disabled by default
        let config = Config::default();
        assert!(!config.slm_query_classification_enabled);
    }

    #[test]
    fn test_slm_query_classification_builder() {
        // Test enabling SLM query classification
        let config = Config::default().with_slm_query_classification(true);
        assert!(config.slm_query_classification_enabled);

        // Test explicitly disabling
        let config = Config::default().with_slm_query_classification(false);
        assert!(!config.slm_query_classification_enabled);
    }
}