coding_agent_search/search/
policy.rs

1//! Semantic policy contract for cass hybrid search.
2//!
3//! This module is the **single source of truth** for all semantic search policy
4//! decisions.  Downstream beads (asset manifests, backfill scheduler, model
5//! acquisition, configuration surfaces, capability reporting) implement against
6//! the types and constants defined here rather than guessing or hardcoding their
7//! own values.
8//!
9//! # Product contract
10//!
11//! Ordinary search **always works lexically**.  Semantic quality improves
12//! opportunistically: when model files are present, vectors are built in the
13//! background and hybrid results are blended in.  A missing or broken semantic
14//! tier never blocks or degrades lexical search.
15//!
16//! # Precedence (lowest to highest)
17//!
18//! 1. **Compiled defaults** — [`SemanticPolicy::compiled_defaults`]
19//! 2. **Persisted config** — `~/.config/cass/semantic.toml` (planned)
20//! 3. **Environment variables** — `CASS_SEMANTIC_*`
21//! 4. **CLI flags** — `--semantic-mode`, `--semantic-budget-mb`, etc.
22//!
23//! Higher layers override lower layers field-by-field; unset fields inherit.
24//!
25//! # Behaviour modes
26//!
27//! | Mode | Lexical | Fast-tier semantic | Quality-tier semantic |
28//! |------|---------|--------------------|----------------------|
29//! | `HybridPreferred` (default) | always | if available | if model present |
30//! | `LexicalOnly` | always | never | never |
31//! | `StrictSemantic` | always (floor) | required | required |
32//!
33//! `StrictSemantic` is for callers that want hard guarantees about semantic
34//! quality (e.g., bake-off).  It is never the default.
35//!
36//! # Storage budget
37//!
38//! Semantic artifacts are **derivative** — they can always be rebuilt from the
39//! canonical SQLite database.  They must never crowd out the DB or the required
40//! lexical index.
41//!
42//! Eviction order (first to go → last to go):
43//! 1. HNSW accelerator indices (`.chsw`)
44//! 2. Quality-tier vector index (`.fsvi`)
45//! 3. Fast-tier vector index
46//! 4. Downloaded model files
47//!
48//! The lexical index and SQLite DB are **never** evicted.
49
50use std::fmt;
51
52use serde::{Deserialize, Serialize};
53
54// ─── Behaviour mode ────────────────────────────────────────────────────────
55
56/// How aggressively cass pursues semantic search.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
58#[serde(rename_all = "snake_case")]
59pub enum SemanticMode {
60    /// Default.  Lexical always works; semantic blended in when available.
61    #[default]
62    HybridPreferred,
63    /// Lexical only — never build or consult semantic assets.
64    LexicalOnly,
65    /// Both tiers required.  Errors if semantic is unavailable.
66    StrictSemantic,
67}
68
69impl fmt::Display for SemanticMode {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        f.write_str(self.as_str())
72    }
73}
74
75impl SemanticMode {
76    pub fn as_str(self) -> &'static str {
77        match self {
78            Self::HybridPreferred => "hybrid_preferred",
79            Self::LexicalOnly => "lexical_only",
80            Self::StrictSemantic => "strict_semantic",
81        }
82    }
83
84    /// Parse from a user-provided string (env, CLI, config).
85    pub fn parse(s: &str) -> Option<Self> {
86        match s.trim().to_ascii_lowercase().replace('-', "_").as_str() {
87            "hybrid_preferred" | "hybrid" | "default" | "auto" => Some(Self::HybridPreferred),
88            "lexical_only" | "lexical" | "lex" | "off" => Some(Self::LexicalOnly),
89            "strict_semantic" | "strict" | "semantic" => Some(Self::StrictSemantic),
90            _ => None,
91        }
92    }
93
94    /// Whether semantic assets should be built at all.
95    pub fn should_build_semantic(&self) -> bool {
96        !matches!(self, Self::LexicalOnly)
97    }
98
99    /// Whether search should fail if semantic is unavailable.
100    pub fn requires_semantic(&self) -> bool {
101        matches!(self, Self::StrictSemantic)
102    }
103}
104
105// ─── Model download policy ─────────────────────────────────────────────────
106
107/// Whether model downloads are automatic, opt-in, or budget-gated.
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
109#[serde(rename_all = "snake_case")]
110pub enum ModelDownloadPolicy {
111    /// Never download automatically; user must explicitly request.
112    #[default]
113    OptIn,
114    /// Download if disk budget allows and user has consented once.
115    BudgetGated,
116    /// Download automatically when needed (not recommended for constrained machines).
117    Automatic,
118}
119
120impl fmt::Display for ModelDownloadPolicy {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        f.write_str(self.as_str())
123    }
124}
125
126impl ModelDownloadPolicy {
127    pub fn as_str(self) -> &'static str {
128        match self {
129            Self::OptIn => "opt_in",
130            Self::BudgetGated => "budget_gated",
131            Self::Automatic => "automatic",
132        }
133    }
134
135    pub fn parse(s: &str) -> Option<Self> {
136        match s.trim().to_ascii_lowercase().replace('-', "_").as_str() {
137            "opt_in" | "optin" | "manual" => Some(Self::OptIn),
138            "budget_gated" | "budget" | "gated" => Some(Self::BudgetGated),
139            "automatic" | "auto" => Some(Self::Automatic),
140            _ => None,
141        }
142    }
143}
144
145// ─── Tier identifiers ──────────────────────────────────────────────────────
146
147/// Default fast-tier embedder name (always available, no model files).
148pub const DEFAULT_FAST_TIER_EMBEDDER: &str = "hash";
149
150/// Default quality-tier embedder name (requires ML model files).
151pub const DEFAULT_QUALITY_TIER_EMBEDDER: &str = "minilm";
152
153/// Default reranker name (requires cross-encoder model files).
154pub const DEFAULT_RERANKER: &str = "ms-marco-minilm";
155
156// ─── Dimension defaults ────────────────────────────────────────────────────
157
158/// Fast-tier embedding dimension (hash embedder).
159pub const DEFAULT_FAST_DIMENSION: usize = 256;
160
161/// Quality-tier embedding dimension (MiniLM).
162pub const DEFAULT_QUALITY_DIMENSION: usize = 384;
163
164/// Quality-tier score weight when blending (0.0-1.0).
165pub const DEFAULT_QUALITY_WEIGHT: f32 = 0.7;
166
167/// Maximum documents to refine via quality tier per query.
168pub const DEFAULT_MAX_REFINEMENT_DOCS: usize = 100;
169
170// ─── Storage budget defaults ───────────────────────────────────────────────
171
172/// Default total semantic disk budget in megabytes.
173///
174/// This covers model files + vector indices + HNSW accelerators.
175/// 500 MB is generous for a personal archive (MiniLM ≈ 90 MB, vectors
176/// scale ~1.5 KB per 1000 messages at f16).  For 100 K messages the
177/// vector index is ~150 KB — the models dominate.
178pub const DEFAULT_SEMANTIC_BUDGET_MB: u64 = 500;
179
180/// Minimum free disk space (MB) that must remain after semantic writes.
181///
182/// If semantic writes would leave less than this on the volume, they are
183/// skipped.  This protects the canonical DB, lexical index, and OS.
184pub const MIN_FREE_DISK_MB: u64 = 200;
185
186/// Model files are the biggest single cost.  Cap per-model.
187pub const MAX_MODEL_SIZE_MB: u64 = 300;
188
189// ─── Background scheduler budgets ──────────────────────────────────────────
190
191/// Maximum CPU cores the background backfill worker may saturate.
192/// On a typical 4-core dev laptop this is ~25 %.
193pub const DEFAULT_MAX_BACKFILL_THREADS: usize = 1;
194
195/// Maximum RSS the backfill worker should target (MB).
196/// This is advisory — the embedder ONNX runtime is the main consumer.
197pub const DEFAULT_MAX_BACKFILL_RSS_MB: u64 = 256;
198
199/// How long (seconds) the scheduler waits after last user activity before
200/// starting background work.  This prevents contention during interactive
201/// search or indexing.
202pub const DEFAULT_IDLE_DELAY_SECONDS: u64 = 30;
203
204/// Maximum wall-clock seconds for a single background work chunk.
205/// The scheduler yields after this to re-check budgets and user activity.
206pub const DEFAULT_CHUNK_TIMEOUT_SECONDS: u64 = 120;
207
208// ─── Invalidation / upgrade constants ──────────────────────────────────────
209
210/// Semantic schema version.  Bump when the vector document ID encoding,
211/// quantization format, or normalization changes.  A version mismatch
212/// forces a full vector rebuild.
213pub const SEMANTIC_SCHEMA_VERSION: u32 = 1;
214
215/// Changing the chunking strategy (e.g., max tokens per chunk, overlap)
216/// invalidates all existing vectors even if the model is unchanged.
217pub const CHUNKING_STRATEGY_VERSION: u32 = 1;
218
219// ─── The policy struct ─────────────────────────────────────────────────────
220
221/// Resolved semantic policy after layering defaults → config → env → CLI.
222///
223/// Every field has a value — the resolution process fills in defaults for
224/// anything not specified by higher layers.
225#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
226pub struct SemanticPolicy {
227    // ── Behaviour ──────────────────────────────────────────────────────
228    /// Active semantic mode.
229    pub mode: SemanticMode,
230
231    /// Whether model downloads may happen automatically.
232    pub download_policy: ModelDownloadPolicy,
233
234    // ── Model selection ────────────────────────────────────────────────
235    /// Fast-tier embedder name (e.g., "hash").
236    pub fast_tier_embedder: String,
237
238    /// Quality-tier embedder name (e.g., "minilm").
239    pub quality_tier_embedder: String,
240
241    /// Reranker name (e.g., "ms-marco-minilm").
242    pub reranker: String,
243
244    // ── Dimensions / weights ───────────────────────────────────────────
245    /// Fast-tier embedding dimension.
246    pub fast_dimension: usize,
247
248    /// Quality-tier embedding dimension.
249    pub quality_dimension: usize,
250
251    /// Quality weight for score blending (0.0–1.0).
252    pub quality_weight: f32,
253
254    /// Maximum documents refined per query.
255    pub max_refinement_docs: usize,
256
257    // ── Storage budget ─────────────────────────────────────────────────
258    /// Total disk budget for all semantic artifacts (MB).
259    pub semantic_budget_mb: u64,
260
261    /// Minimum free disk that must remain after writes (MB).
262    pub min_free_disk_mb: u64,
263
264    /// Maximum single model size (MB).
265    pub max_model_size_mb: u64,
266
267    // ── Background scheduler ───────────────────────────────────────────
268    /// Max threads for background backfill.
269    pub max_backfill_threads: usize,
270
271    /// Max RSS target for backfill worker (MB).
272    pub max_backfill_rss_mb: u64,
273
274    /// Idle delay before background work starts (seconds).
275    pub idle_delay_seconds: u64,
276
277    /// Max seconds per background work chunk.
278    pub chunk_timeout_seconds: u64,
279
280    // ── Versioning ─────────────────────────────────────────────────────
281    /// Semantic schema version — mismatch forces rebuild.
282    pub semantic_schema_version: u32,
283
284    /// Chunking strategy version — mismatch forces rebuild.
285    pub chunking_strategy_version: u32,
286}
287
288impl Default for SemanticPolicy {
289    fn default() -> Self {
290        Self::compiled_defaults()
291    }
292}
293
294impl SemanticPolicy {
295    /// Compiled defaults — lowest precedence.
296    pub fn compiled_defaults() -> Self {
297        Self {
298            mode: SemanticMode::default(),
299            download_policy: ModelDownloadPolicy::default(),
300            fast_tier_embedder: DEFAULT_FAST_TIER_EMBEDDER.to_owned(),
301            quality_tier_embedder: DEFAULT_QUALITY_TIER_EMBEDDER.to_owned(),
302            reranker: DEFAULT_RERANKER.to_owned(),
303            fast_dimension: DEFAULT_FAST_DIMENSION,
304            quality_dimension: DEFAULT_QUALITY_DIMENSION,
305            quality_weight: DEFAULT_QUALITY_WEIGHT,
306            max_refinement_docs: DEFAULT_MAX_REFINEMENT_DOCS,
307            semantic_budget_mb: DEFAULT_SEMANTIC_BUDGET_MB,
308            min_free_disk_mb: MIN_FREE_DISK_MB,
309            max_model_size_mb: MAX_MODEL_SIZE_MB,
310            max_backfill_threads: DEFAULT_MAX_BACKFILL_THREADS,
311            max_backfill_rss_mb: DEFAULT_MAX_BACKFILL_RSS_MB,
312            idle_delay_seconds: DEFAULT_IDLE_DELAY_SECONDS,
313            chunk_timeout_seconds: DEFAULT_CHUNK_TIMEOUT_SECONDS,
314            semantic_schema_version: SEMANTIC_SCHEMA_VERSION,
315            chunking_strategy_version: CHUNKING_STRATEGY_VERSION,
316        }
317    }
318
319    fn with_env_lookup(mut self, mut lookup: impl FnMut(&str) -> Option<String>) -> Self {
320        if let Some(val) = lookup("CASS_SEMANTIC_MODE")
321            && let Some(mode) = SemanticMode::parse(&val)
322        {
323            self.mode = mode;
324        }
325
326        // Legacy alias: CASS_SEMANTIC_EMBEDDER=hash → LexicalOnly is wrong,
327        // it means "use hash as fast tier and skip quality".  We translate it
328        // into mode=HybridPreferred with hash as fast-tier (which is already
329        // the default).  The only actionable value is "hash" which forces
330        // HashFallback behaviour.
331        if let Some(val) = lookup("CASS_SEMANTIC_EMBEDDER") {
332            match val.trim().to_ascii_lowercase().as_str() {
333                "hash" => {
334                    // User explicitly wants hash-only — disable quality tier
335                    // but keep the mode hybrid-preferred so lexical still works.
336                    self.quality_tier_embedder = "hash".to_owned();
337                }
338                other => {
339                    // Treat as quality-tier embedder name override.
340                    self.quality_tier_embedder = other.to_owned();
341                }
342            }
343        }
344
345        if let Some(val) = lookup("CASS_SEMANTIC_DOWNLOAD_POLICY")
346            && let Some(policy) = ModelDownloadPolicy::parse(&val)
347        {
348            self.download_policy = policy;
349        }
350
351        if let Some(val) = lookup("CASS_SEMANTIC_BUDGET_MB")
352            && let Ok(mb) = val.trim().parse::<u64>()
353        {
354            self.semantic_budget_mb = mb;
355        }
356
357        if let Some(val) = lookup("CASS_SEMANTIC_MIN_FREE_DISK_MB")
358            && let Ok(mb) = val.trim().parse::<u64>()
359        {
360            self.min_free_disk_mb = mb;
361        }
362
363        if let Some(val) = lookup("CASS_SEMANTIC_MAX_MODEL_SIZE_MB")
364            && let Ok(mb) = val.trim().parse::<u64>()
365        {
366            self.max_model_size_mb = mb;
367        }
368
369        // Two-tier overrides (these already exist; we subsume them here for
370        // single-point resolution).
371        if let Some(val) = lookup("CASS_TWO_TIER_FAST_DIM")
372            && let Ok(dim) = val.trim().parse()
373        {
374            self.fast_dimension = dim;
375        }
376
377        if let Some(val) = lookup("CASS_TWO_TIER_QUALITY_DIM")
378            && let Ok(dim) = val.trim().parse()
379        {
380            self.quality_dimension = dim;
381        }
382
383        if let Some(val) = lookup("CASS_TWO_TIER_QUALITY_WEIGHT")
384            && let Ok(w) = val.trim().parse::<f32>()
385        {
386            self.quality_weight = w.clamp(0.0, 1.0);
387        }
388
389        if let Some(val) = lookup("CASS_TWO_TIER_MAX_REFINEMENT")
390            && let Ok(max) = val.trim().parse()
391        {
392            self.max_refinement_docs = max;
393        }
394
395        if let Some(val) = lookup("CASS_SEMANTIC_MAX_BACKFILL_THREADS")
396            && let Ok(n) = val.trim().parse()
397        {
398            self.max_backfill_threads = n;
399        }
400
401        if let Some(val) = lookup("CASS_SEMANTIC_MAX_BACKFILL_RSS_MB")
402            && let Ok(mb) = val.trim().parse()
403        {
404            self.max_backfill_rss_mb = mb;
405        }
406
407        if let Some(val) = lookup("CASS_SEMANTIC_IDLE_DELAY_SECONDS")
408            && let Ok(s) = val.trim().parse()
409        {
410            self.idle_delay_seconds = s;
411        }
412
413        if let Some(val) = lookup("CASS_SEMANTIC_CHUNK_TIMEOUT_SECONDS")
414            && let Ok(s) = val.trim().parse()
415        {
416            self.chunk_timeout_seconds = s;
417        }
418
419        self
420    }
421
422    /// Layer environment variables over the current policy.
423    ///
424    /// Only overrides fields for which env vars are set and parseable.
425    pub fn with_env_overrides(self) -> Self {
426        self.with_env_lookup(|key| dotenvy::var(key).ok())
427    }
428
429    /// Layer explicit CLI overrides.
430    ///
431    /// Each `Option` is `Some` only when the user passed that flag.
432    pub fn with_cli_overrides(mut self, overrides: &CliSemanticOverrides) -> Self {
433        if let Some(mode) = overrides.mode {
434            self.mode = mode;
435        }
436        if let Some(budget) = overrides.semantic_budget_mb {
437            self.semantic_budget_mb = budget;
438        }
439        if let Some(ref embedder) = overrides.quality_tier_embedder {
440            self.quality_tier_embedder = embedder.clone();
441        }
442        if let Some(threads) = overrides.max_backfill_threads {
443            self.max_backfill_threads = threads;
444        }
445        self
446    }
447
448    /// Full resolution: compiled defaults → env → CLI.
449    pub fn resolve(cli: &CliSemanticOverrides) -> Self {
450        Self::compiled_defaults()
451            .with_env_overrides()
452            .with_cli_overrides(cli)
453    }
454}
455
456/// CLI-level overrides — `None` means "inherit from lower layer".
457#[derive(Debug, Clone, Default)]
458pub struct CliSemanticOverrides {
459    pub mode: Option<SemanticMode>,
460    pub semantic_budget_mb: Option<u64>,
461    pub quality_tier_embedder: Option<String>,
462    pub max_backfill_threads: Option<usize>,
463}
464
465// ─── Effective-setting introspection ───────────────────────────────────────
466
467/// Where a configuration value came from.
468#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
469#[serde(rename_all = "snake_case")]
470pub enum SettingSource {
471    /// Compiled into the binary.
472    CompiledDefault,
473    /// Loaded from persisted config file.
474    Config,
475    /// Set via environment variable.
476    Environment,
477    /// Set via CLI flag.
478    Cli,
479}
480
481impl fmt::Display for SettingSource {
482    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
483        f.write_str(self.as_str())
484    }
485}
486
487impl SettingSource {
488    pub fn as_str(self) -> &'static str {
489        match self {
490            Self::CompiledDefault => "compiled_default",
491            Self::Config => "config",
492            Self::Environment => "environment",
493            Self::Cli => "cli",
494        }
495    }
496}
497
498/// A single setting with its resolved value and provenance.
499#[derive(Debug, Clone, Serialize, Deserialize)]
500pub struct EffectiveSetting {
501    pub name: String,
502    pub value: String,
503    pub source: SettingSource,
504    /// The environment variable that could override this (if any).
505    pub env_var: Option<String>,
506}
507
508/// Complete effective-settings report for `cass status --json`.
509///
510/// **Known limitation**: Provenance detection compares resolved values, not
511/// whether an env var was _set_.  If an env var is set to the same value as the
512/// compiled default, the reported source will be `CompiledDefault` rather than
513/// `Environment`.  The effective value is always correct regardless.
514#[derive(Debug, Clone, Serialize, Deserialize)]
515pub struct EffectiveSettings {
516    pub settings: Vec<EffectiveSetting>,
517}
518
519fn compiled_default_setting(name: &str, value: impl Into<String>) -> EffectiveSetting {
520    EffectiveSetting {
521        name: name.to_owned(),
522        value: value.into(),
523        source: SettingSource::CompiledDefault,
524        env_var: None,
525    }
526}
527
528impl EffectiveSettings {
529    fn resolve_with_env_lookup(
530        cli: &CliSemanticOverrides,
531        lookup: impl FnMut(&str) -> Option<String>,
532    ) -> Self {
533        let defaults = SemanticPolicy::compiled_defaults();
534        let env_policy = defaults.clone().with_env_lookup(lookup);
535        let final_policy = env_policy.clone().with_cli_overrides(cli);
536
537        let mut settings = Vec::new();
538
539        // Helper: determine source for a field by comparing layers.
540        macro_rules! track {
541            ($name:expr, $field:ident, $env_var:expr, $cli_field:ident) => {
542                let source = if cli.$cli_field.is_some() {
543                    SettingSource::Cli
544                } else if env_policy.$field != defaults.$field {
545                    SettingSource::Environment
546                } else {
547                    SettingSource::CompiledDefault
548                };
549                settings.push(EffectiveSetting {
550                    name: $name.to_owned(),
551                    value: format!("{}", final_policy.$field),
552                    source,
553                    env_var: Some($env_var.to_owned()),
554                });
555            };
556        }
557
558        // Mode
559        track!("mode", mode, "CASS_SEMANTIC_MODE", mode);
560
561        // Budget
562        track!(
563            "semantic_budget_mb",
564            semantic_budget_mb,
565            "CASS_SEMANTIC_BUDGET_MB",
566            semantic_budget_mb
567        );
568
569        // Quality tier embedder
570        track!(
571            "quality_tier_embedder",
572            quality_tier_embedder,
573            "CASS_SEMANTIC_EMBEDDER",
574            quality_tier_embedder
575        );
576
577        // Backfill threads
578        track!(
579            "max_backfill_threads",
580            max_backfill_threads,
581            "CASS_SEMANTIC_MAX_BACKFILL_THREADS",
582            max_backfill_threads
583        );
584
585        // Fields without CLI overrides — only env vs default.
586        // Note: fast_tier_embedder and reranker have no env var overrides.
587        settings.push(compiled_default_setting(
588            "fast_tier_embedder",
589            final_policy.fast_tier_embedder.clone(),
590        ));
591        settings.push(compiled_default_setting(
592            "reranker",
593            final_policy.reranker.clone(),
594        ));
595
596        type EnvOnlyFieldGetter = fn(&SemanticPolicy) -> String;
597        type EnvOnlyField<'a> = (&'a str, &'a str, EnvOnlyFieldGetter);
598
599        let env_only_fields: &[EnvOnlyField<'_>] = &[
600            ("fast_dimension", "CASS_TWO_TIER_FAST_DIM", |p| {
601                p.fast_dimension.to_string()
602            }),
603            ("quality_dimension", "CASS_TWO_TIER_QUALITY_DIM", |p| {
604                p.quality_dimension.to_string()
605            }),
606            ("quality_weight", "CASS_TWO_TIER_QUALITY_WEIGHT", |p| {
607                format!("{}", p.quality_weight)
608            }),
609            ("max_refinement_docs", "CASS_TWO_TIER_MAX_REFINEMENT", |p| {
610                p.max_refinement_docs.to_string()
611            }),
612            ("min_free_disk_mb", "CASS_SEMANTIC_MIN_FREE_DISK_MB", |p| {
613                p.min_free_disk_mb.to_string()
614            }),
615            (
616                "max_model_size_mb",
617                "CASS_SEMANTIC_MAX_MODEL_SIZE_MB",
618                |p| p.max_model_size_mb.to_string(),
619            ),
620            ("download_policy", "CASS_SEMANTIC_DOWNLOAD_POLICY", |p| {
621                p.download_policy.to_string()
622            }),
623            (
624                "idle_delay_seconds",
625                "CASS_SEMANTIC_IDLE_DELAY_SECONDS",
626                |p| p.idle_delay_seconds.to_string(),
627            ),
628            (
629                "chunk_timeout_seconds",
630                "CASS_SEMANTIC_CHUNK_TIMEOUT_SECONDS",
631                |p| p.chunk_timeout_seconds.to_string(),
632            ),
633            (
634                "max_backfill_rss_mb",
635                "CASS_SEMANTIC_MAX_BACKFILL_RSS_MB",
636                |p| p.max_backfill_rss_mb.to_string(),
637            ),
638        ];
639
640        for (name, env_var, getter) in env_only_fields {
641            let default_val = getter(&defaults);
642            let env_val = getter(&env_policy);
643            let source = if env_val != default_val {
644                SettingSource::Environment
645            } else {
646                SettingSource::CompiledDefault
647            };
648            settings.push(EffectiveSetting {
649                name: name.to_string(),
650                value: getter(&final_policy),
651                source,
652                env_var: Some(env_var.to_string()),
653            });
654        }
655
656        // Version fields (always compiled default).
657        settings.push(compiled_default_setting(
658            "semantic_schema_version",
659            final_policy.semantic_schema_version.to_string(),
660        ));
661        settings.push(compiled_default_setting(
662            "chunking_strategy_version",
663            final_policy.chunking_strategy_version.to_string(),
664        ));
665
666        Self { settings }
667    }
668
669    /// Build the effective-settings report by resolving each field with
670    /// full provenance tracking.
671    pub fn resolve(cli: &CliSemanticOverrides) -> Self {
672        Self::resolve_with_env_lookup(cli, |key| dotenvy::var(key).ok())
673    }
674
675    /// Find a setting by name.
676    pub fn get(&self, name: &str) -> Option<&EffectiveSetting> {
677        self.settings.iter().find(|s| s.name == name)
678    }
679
680    /// Count settings from each source.
681    pub fn source_counts(&self) -> std::collections::HashMap<SettingSource, usize> {
682        let mut counts = std::collections::HashMap::new();
683        for s in &self.settings {
684            *counts.entry(s.source).or_insert(0) += 1;
685        }
686        counts
687    }
688}
689
690// ─── Capability classification ─────────────────────────────────────────────
691
692/// What semantic quality level is achievable on this machine right now.
693#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
694#[serde(rename_all = "snake_case")]
695pub enum SemanticCapability {
696    /// Full quality: ML model present, vector index built, HNSW available.
697    FullQuality,
698    /// Quality tier available but HNSW accelerator missing (brute-force OK).
699    QualityNoHnsw,
700    /// Only fast-tier (hash) semantic — no ML model installed.
701    FastTierOnly,
702    /// No semantic capability — mode is lexical-only.
703    LexicalOnly,
704    /// Semantic is desired but broken (model corrupt, load failed, etc.).
705    Degraded { reason: String },
706}
707
708impl SemanticCapability {
709    /// Whether any semantic search is possible.
710    pub fn can_search_semantic(&self) -> bool {
711        matches!(
712            self,
713            Self::FullQuality | Self::QualityNoHnsw | Self::FastTierOnly
714        )
715    }
716
717    /// Whether quality-tier (ML) search is possible.
718    pub fn has_quality_tier(&self) -> bool {
719        matches!(self, Self::FullQuality | Self::QualityNoHnsw)
720    }
721
722    /// Short label for TUI/robot status.
723    pub fn status_label(&self) -> &'static str {
724        match self {
725            Self::FullQuality => "SEM+",
726            Self::QualityNoHnsw => "SEM",
727            Self::FastTierOnly => "SEM*",
728            Self::LexicalOnly => "LEX",
729            Self::Degraded { .. } => "ERR",
730        }
731    }
732
733    /// Human-readable summary for `cass status --json`.
734    pub fn summary(&self) -> String {
735        match self {
736            Self::FullQuality => {
737                "Full semantic: ML embedder + vector index + HNSW accelerator".to_owned()
738            }
739            Self::QualityNoHnsw => {
740                "Quality semantic: ML embedder + vector index (brute-force)".to_owned()
741            }
742            Self::FastTierOnly => {
743                "Fast semantic: hash embedder only (install ML model for quality)".to_owned()
744            }
745            Self::LexicalOnly => "Lexical only: semantic search disabled by policy".to_owned(),
746            Self::Degraded { reason } => format!("Degraded: {reason}"),
747        }
748    }
749}
750
751// ─── Invalidation decisions ────────────────────────────────────────────────
752
753/// What happened and what to do about existing semantic assets.
754#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
755#[serde(rename_all = "snake_case")]
756pub enum InvalidationAction {
757    /// Assets are current — nothing to do.
758    UpToDate,
759    /// Vectors are stale but usable until rebuild completes.
760    RebuildInBackground,
761    /// Vectors are from an incompatible schema — must discard and rebuild.
762    DiscardAndRebuild { reason: String },
763    /// Assets should be removed entirely (mode changed to lexical-only).
764    Evict,
765}
766
767/// Metadata stored alongside semantic assets to detect invalidation.
768#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
769pub struct SemanticAssetManifest {
770    /// Embedder ID that produced these vectors (e.g., "minilm-384").
771    pub embedder_id: String,
772    /// HuggingFace revision hash of the model checkpoint.
773    pub model_revision: String,
774    /// Semantic schema version at build time.
775    pub schema_version: u32,
776    /// Chunking strategy version at build time.
777    pub chunking_version: u32,
778    /// Number of documents embedded.
779    pub doc_count: u64,
780    /// Unix timestamp (ms) of last build.
781    pub built_at_ms: i64,
782}
783
784impl SemanticAssetManifest {
785    /// Decide what to do given the current policy, expected embedder ID, and
786    /// the model revision currently installed.
787    ///
788    /// `expected_embedder_id` should be the full embedder ID for the tier this
789    /// manifest belongs to (e.g., `"fnv1a-384"` for fast, `"minilm-384"` for
790    /// quality).
791    pub fn invalidation_action(
792        &self,
793        policy: &SemanticPolicy,
794        current_model_revision: &str,
795        expected_embedder_id: &str,
796    ) -> InvalidationAction {
797        // Mode changed to lexical-only → evict everything.
798        if !policy.mode.should_build_semantic() {
799            return InvalidationAction::Evict;
800        }
801
802        // Schema version mismatch → hard rebuild (encoding changed).
803        if self.schema_version != policy.semantic_schema_version {
804            return InvalidationAction::DiscardAndRebuild {
805                reason: format!(
806                    "semantic schema version changed ({} → {})",
807                    self.schema_version, policy.semantic_schema_version
808                ),
809            };
810        }
811
812        // Chunking strategy changed → hard rebuild (segments differ).
813        if self.chunking_version != policy.chunking_strategy_version {
814            return InvalidationAction::DiscardAndRebuild {
815                reason: format!(
816                    "chunking strategy version changed ({} → {})",
817                    self.chunking_version, policy.chunking_strategy_version
818                ),
819            };
820        }
821
822        // Embedder ID changed entirely (e.g., minilm → snowflake) → hard
823        // rebuild because dimensions or encoding may differ.  This MUST be
824        // checked before model revision: an embedder change means the vectors
825        // are in a completely different space and cannot serve as interim results.
826        if self.embedder_id != expected_embedder_id {
827            return InvalidationAction::DiscardAndRebuild {
828                reason: format!(
829                    "embedder changed ({} → {})",
830                    self.embedder_id, expected_embedder_id
831                ),
832            };
833        }
834
835        // Model revision changed (same embedder) → soft rebuild.  Old vectors
836        // are in the same space and usable until rebuild completes.
837        if self.model_revision != current_model_revision {
838            return InvalidationAction::RebuildInBackground;
839        }
840
841        InvalidationAction::UpToDate
842    }
843}
844
845// ─── Budget decisions ──────────────────────────────────────────────────────
846
847/// Result of a disk-budget check.
848#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
849#[serde(rename_all = "snake_case")]
850pub enum BudgetDecision {
851    /// Plenty of room — proceed.
852    Allowed,
853    /// Would exceed the semantic budget but free disk is fine — warn.
854    OverBudgetWarn { used_mb: u64, budget_mb: u64 },
855    /// Would leave less than min_free_disk_mb — deny.
856    DiskPressureDeny { free_mb: u64, min_required_mb: u64 },
857    /// Model too large for per-model cap — deny.
858    ModelTooLarge { model_mb: u64, max_mb: u64 },
859}
860
861impl BudgetDecision {
862    pub fn is_allowed(&self) -> bool {
863        matches!(self, Self::Allowed | Self::OverBudgetWarn { .. })
864    }
865}
866
867impl SemanticPolicy {
868    /// Check whether a proposed write of `write_size_mb` is within budget.
869    ///
870    /// This is intended for **model downloads** — the first check compares
871    /// against `max_model_size_mb`.  For vector index writes (which are much
872    /// smaller), prefer skipping the per-model cap or calling with a separate
873    /// budget method when one is needed.
874    ///
875    /// `current_semantic_usage_mb` is the total disk used by semantic artifacts
876    /// right now.  `free_disk_mb` is the free space on the volume.
877    pub fn check_budget(
878        &self,
879        write_size_mb: u64,
880        current_semantic_usage_mb: u64,
881        free_disk_mb: u64,
882    ) -> BudgetDecision {
883        // Per-model cap.
884        if write_size_mb > self.max_model_size_mb {
885            return BudgetDecision::ModelTooLarge {
886                model_mb: write_size_mb,
887                max_mb: self.max_model_size_mb,
888            };
889        }
890
891        // Free disk floor.
892        if free_disk_mb.saturating_sub(write_size_mb) < self.min_free_disk_mb {
893            return BudgetDecision::DiskPressureDeny {
894                free_mb: free_disk_mb,
895                min_required_mb: self.min_free_disk_mb,
896            };
897        }
898
899        // Total semantic budget.
900        let new_total = current_semantic_usage_mb.saturating_add(write_size_mb);
901        if new_total > self.semantic_budget_mb {
902            return BudgetDecision::OverBudgetWarn {
903                used_mb: new_total,
904                budget_mb: self.semantic_budget_mb,
905            };
906        }
907
908        BudgetDecision::Allowed
909    }
910}
911
912// ─── Robot-friendly capability payload ─────────────────────────────────────
913
914/// JSON-serializable capability snapshot for `cass status --json`.
915#[derive(Debug, Clone, Serialize, Deserialize)]
916pub struct SemanticCapabilityReport {
917    pub mode: SemanticMode,
918    pub capability: SemanticCapability,
919    pub fast_tier_embedder: String,
920    pub quality_tier_embedder: String,
921    pub reranker: String,
922    pub fast_dimension: usize,
923    pub quality_dimension: usize,
924    pub quality_weight: f32,
925    pub semantic_budget_mb: u64,
926    pub current_usage_mb: u64,
927    pub download_policy: ModelDownloadPolicy,
928    pub semantic_schema_version: u32,
929    pub chunking_strategy_version: u32,
930    pub summary: String,
931}
932
933impl SemanticCapabilityReport {
934    /// Build a report from a resolved policy and observed capability.
935    pub fn from_policy(
936        policy: &SemanticPolicy,
937        capability: SemanticCapability,
938        current_usage_mb: u64,
939    ) -> Self {
940        let summary = capability.summary();
941        Self {
942            mode: policy.mode,
943            capability,
944            fast_tier_embedder: policy.fast_tier_embedder.clone(),
945            quality_tier_embedder: policy.quality_tier_embedder.clone(),
946            reranker: policy.reranker.clone(),
947            fast_dimension: policy.fast_dimension,
948            quality_dimension: policy.quality_dimension,
949            quality_weight: policy.quality_weight,
950            semantic_budget_mb: policy.semantic_budget_mb,
951            current_usage_mb,
952            download_policy: policy.download_policy,
953            semantic_schema_version: policy.semantic_schema_version,
954            chunking_strategy_version: policy.chunking_strategy_version,
955            summary,
956        }
957    }
958}
959
960// ─── Eviction order ────────────────────────────────────────────────────────
961
962/// Ordered list of semantic artifact categories, first-to-evict first.
963pub const EVICTION_ORDER: &[SemanticArtifactKind] = &[
964    SemanticArtifactKind::HnswAccelerator,
965    SemanticArtifactKind::QualityVectorIndex,
966    SemanticArtifactKind::FastVectorIndex,
967    SemanticArtifactKind::ModelFiles,
968];
969
970/// Categories of semantic artifacts for eviction / budget accounting.
971#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
972#[serde(rename_all = "snake_case")]
973pub enum SemanticArtifactKind {
974    HnswAccelerator,
975    QualityVectorIndex,
976    FastVectorIndex,
977    ModelFiles,
978}
979
980impl SemanticArtifactKind {
981    /// Whether this artifact is required for the given capability level.
982    pub fn required_for(&self, capability: &SemanticCapability) -> bool {
983        match (self, capability) {
984            (_, SemanticCapability::LexicalOnly) => false,
985            (Self::HnswAccelerator, _) => false, // always optional
986            (Self::ModelFiles, SemanticCapability::FastTierOnly) => false,
987            (Self::QualityVectorIndex, SemanticCapability::FastTierOnly) => false,
988            _ => true,
989        }
990    }
991}
992
993// ─── Tests ─────────────────────────────────────────────────────────────────
994
995#[cfg(test)]
996mod tests {
997    use super::*;
998
999    // ── Precedence resolution ──────────────────────────────────────────
1000
1001    #[test]
1002    fn compiled_defaults_are_hybrid_preferred() {
1003        let p = SemanticPolicy::compiled_defaults();
1004        assert_eq!(p.mode, SemanticMode::HybridPreferred);
1005        assert_eq!(p.fast_tier_embedder, "hash");
1006        assert_eq!(p.quality_tier_embedder, "minilm");
1007        assert_eq!(p.download_policy, ModelDownloadPolicy::OptIn);
1008        assert_eq!(p.fast_dimension, 256);
1009        assert_eq!(p.quality_dimension, 384);
1010        assert!((p.quality_weight - 0.7).abs() < f32::EPSILON);
1011        assert_eq!(p.max_refinement_docs, 100);
1012        assert_eq!(p.semantic_budget_mb, 500);
1013        assert_eq!(p.min_free_disk_mb, 200);
1014        assert_eq!(p.max_backfill_threads, 1);
1015        assert_eq!(p.semantic_schema_version, SEMANTIC_SCHEMA_VERSION);
1016        assert_eq!(p.chunking_strategy_version, CHUNKING_STRATEGY_VERSION);
1017    }
1018
1019    #[test]
1020    fn cli_overrides_beat_defaults() {
1021        let cli = CliSemanticOverrides {
1022            mode: Some(SemanticMode::LexicalOnly),
1023            semantic_budget_mb: Some(100),
1024            quality_tier_embedder: Some("snowflake".to_owned()),
1025            max_backfill_threads: Some(4),
1026        };
1027        let p = SemanticPolicy::compiled_defaults().with_cli_overrides(&cli);
1028        assert_eq!(p.mode, SemanticMode::LexicalOnly);
1029        assert_eq!(p.semantic_budget_mb, 100);
1030        assert_eq!(p.quality_tier_embedder, "snowflake");
1031        assert_eq!(p.max_backfill_threads, 4);
1032        // Unset fields remain default.
1033        assert_eq!(p.fast_tier_embedder, "hash");
1034        assert_eq!(p.quality_dimension, 384);
1035    }
1036
1037    #[test]
1038    fn cli_overrides_beat_env_overrides() {
1039        // Simulate env setting mode=lexical_only, then CLI overrides to strict.
1040        let mut p = SemanticPolicy::compiled_defaults();
1041        p.mode = SemanticMode::LexicalOnly; // as-if env set it
1042        let cli = CliSemanticOverrides {
1043            mode: Some(SemanticMode::StrictSemantic),
1044            ..Default::default()
1045        };
1046        let p = p.with_cli_overrides(&cli);
1047        assert_eq!(p.mode, SemanticMode::StrictSemantic);
1048    }
1049
1050    // ── Semantic mode parsing (table-driven) ───────────────────────────
1051
1052    #[test]
1053    fn semantic_mode_parsing() {
1054        let cases: &[(&str, Option<SemanticMode>)] = &[
1055            ("hybrid_preferred", Some(SemanticMode::HybridPreferred)),
1056            ("hybrid", Some(SemanticMode::HybridPreferred)),
1057            ("default", Some(SemanticMode::HybridPreferred)),
1058            ("auto", Some(SemanticMode::HybridPreferred)),
1059            ("HYBRID", Some(SemanticMode::HybridPreferred)),
1060            ("lexical_only", Some(SemanticMode::LexicalOnly)),
1061            ("lexical", Some(SemanticMode::LexicalOnly)),
1062            ("lex", Some(SemanticMode::LexicalOnly)),
1063            ("off", Some(SemanticMode::LexicalOnly)),
1064            ("strict_semantic", Some(SemanticMode::StrictSemantic)),
1065            ("strict", Some(SemanticMode::StrictSemantic)),
1066            ("semantic", Some(SemanticMode::StrictSemantic)),
1067            ("  Hybrid-Preferred  ", Some(SemanticMode::HybridPreferred)),
1068            ("nonsense", None),
1069            ("", None),
1070        ];
1071        for (input, expected) in cases {
1072            assert_eq!(
1073                SemanticMode::parse(input),
1074                *expected,
1075                "failed for input: {input:?}"
1076            );
1077        }
1078    }
1079
1080    #[test]
1081    fn download_policy_parsing() {
1082        let cases: &[(&str, Option<ModelDownloadPolicy>)] = &[
1083            ("opt_in", Some(ModelDownloadPolicy::OptIn)),
1084            ("optin", Some(ModelDownloadPolicy::OptIn)),
1085            ("manual", Some(ModelDownloadPolicy::OptIn)),
1086            ("budget_gated", Some(ModelDownloadPolicy::BudgetGated)),
1087            ("budget", Some(ModelDownloadPolicy::BudgetGated)),
1088            ("gated", Some(ModelDownloadPolicy::BudgetGated)),
1089            ("automatic", Some(ModelDownloadPolicy::Automatic)),
1090            ("auto", Some(ModelDownloadPolicy::Automatic)),
1091            ("xyz", None),
1092        ];
1093        for (input, expected) in cases {
1094            assert_eq!(
1095                ModelDownloadPolicy::parse(input),
1096                *expected,
1097                "failed for input: {input:?}"
1098            );
1099        }
1100    }
1101
1102    #[test]
1103    fn display_spellings_delegate_to_as_str() {
1104        let semantic_modes = [
1105            (SemanticMode::HybridPreferred, "hybrid_preferred"),
1106            (SemanticMode::LexicalOnly, "lexical_only"),
1107            (SemanticMode::StrictSemantic, "strict_semantic"),
1108        ];
1109        for (mode, expected) in semantic_modes {
1110            assert_eq!(mode.as_str(), expected);
1111            assert_eq!(mode.to_string(), expected);
1112        }
1113
1114        let download_policies = [
1115            (ModelDownloadPolicy::OptIn, "opt_in"),
1116            (ModelDownloadPolicy::BudgetGated, "budget_gated"),
1117            (ModelDownloadPolicy::Automatic, "automatic"),
1118        ];
1119        for (policy, expected) in download_policies {
1120            assert_eq!(policy.as_str(), expected);
1121            assert_eq!(policy.to_string(), expected);
1122        }
1123
1124        let setting_sources = [
1125            (SettingSource::CompiledDefault, "compiled_default"),
1126            (SettingSource::Config, "config"),
1127            (SettingSource::Environment, "environment"),
1128            (SettingSource::Cli, "cli"),
1129        ];
1130        for (source, expected) in setting_sources {
1131            assert_eq!(source.as_str(), expected);
1132            assert_eq!(source.to_string(), expected);
1133        }
1134    }
1135
1136    // ── Semantic mode behaviour flags ──────────────────────────────────
1137
1138    #[test]
1139    fn mode_behaviour_flags() {
1140        let cases: &[(SemanticMode, bool, bool)] = &[
1141            // (mode, should_build_semantic, requires_semantic)
1142            (SemanticMode::HybridPreferred, true, false),
1143            (SemanticMode::LexicalOnly, false, false),
1144            (SemanticMode::StrictSemantic, true, true),
1145        ];
1146        for (mode, build, require) in cases {
1147            assert_eq!(
1148                mode.should_build_semantic(),
1149                *build,
1150                "should_build for {mode:?}"
1151            );
1152            assert_eq!(mode.requires_semantic(), *require, "requires for {mode:?}");
1153        }
1154    }
1155
1156    // ── Capability classification ──────────────────────────────────────
1157
1158    #[test]
1159    fn capability_classification() {
1160        let cases: &[(SemanticCapability, bool, bool, &str)] = &[
1161            // (capability, can_search, has_quality, label)
1162            (SemanticCapability::FullQuality, true, true, "SEM+"),
1163            (SemanticCapability::QualityNoHnsw, true, true, "SEM"),
1164            (SemanticCapability::FastTierOnly, true, false, "SEM*"),
1165            (SemanticCapability::LexicalOnly, false, false, "LEX"),
1166            (
1167                SemanticCapability::Degraded {
1168                    reason: "test".to_owned(),
1169                },
1170                false,
1171                false,
1172                "ERR",
1173            ),
1174        ];
1175        for (cap, can_search, has_quality, label) in cases {
1176            assert_eq!(
1177                cap.can_search_semantic(),
1178                *can_search,
1179                "can_search for {cap:?}"
1180            );
1181            assert_eq!(
1182                cap.has_quality_tier(),
1183                *has_quality,
1184                "has_quality for {cap:?}"
1185            );
1186            assert_eq!(cap.status_label(), *label, "label for {cap:?}");
1187        }
1188    }
1189
1190    // ── Budget decisions (table-driven) ────────────────────────────────
1191
1192    #[test]
1193    fn budget_decisions() {
1194        let p = SemanticPolicy::compiled_defaults();
1195        // defaults: budget=500, min_free=200, max_model=300
1196
1197        let cases: &[(u64, u64, u64, BudgetDecision)] = &[
1198            // (write_mb, current_usage_mb, free_disk_mb, expected)
1199            //
1200            // Normal: 90 MB write, 100 used, 1000 free → allowed
1201            (90, 100, 1000, BudgetDecision::Allowed),
1202            // Over budget: 90 MB write, 450 used (total=540 > 500) → warn
1203            (
1204                90,
1205                450,
1206                1000,
1207                BudgetDecision::OverBudgetWarn {
1208                    used_mb: 540,
1209                    budget_mb: 500,
1210                },
1211            ),
1212            // Disk pressure: 90 MB write, 0 used, 250 free (250-90=160 < 200) → deny
1213            (
1214                90,
1215                0,
1216                250,
1217                BudgetDecision::DiskPressureDeny {
1218                    free_mb: 250,
1219                    min_required_mb: 200,
1220                },
1221            ),
1222            // Model too large: 350 MB > max_model 300 → deny
1223            (
1224                350,
1225                0,
1226                1000,
1227                BudgetDecision::ModelTooLarge {
1228                    model_mb: 350,
1229                    max_mb: 300,
1230                },
1231            ),
1232            // Edge: exact budget limit (90+410=500) → allowed
1233            (90, 410, 1000, BudgetDecision::Allowed),
1234            // Edge: 1 MB over budget → warn
1235            (
1236                91,
1237                410,
1238                1000,
1239                BudgetDecision::OverBudgetWarn {
1240                    used_mb: 501,
1241                    budget_mb: 500,
1242                },
1243            ),
1244            // Edge: exact free floor (free - write = min_free exactly)
1245            (90, 0, 290, BudgetDecision::Allowed),
1246            // Edge: 1 MB under free floor
1247            (
1248                90,
1249                0,
1250                289,
1251                BudgetDecision::DiskPressureDeny {
1252                    free_mb: 289,
1253                    min_required_mb: 200,
1254                },
1255            ),
1256        ];
1257
1258        for (write, usage, free, expected) in cases {
1259            let got = p.check_budget(*write, *usage, *free);
1260            assert_eq!(
1261                got, *expected,
1262                "budget check failed for write={write}, usage={usage}, free={free}"
1263            );
1264        }
1265    }
1266
1267    // ── Invalidation / upgrade decisions (table-driven) ────────────────
1268
1269    #[test]
1270    fn invalidation_decisions() {
1271        let policy = SemanticPolicy::compiled_defaults();
1272        let expected_id = format!(
1273            "{}-{}",
1274            policy.quality_tier_embedder, policy.quality_dimension
1275        );
1276
1277        let base_manifest = SemanticAssetManifest {
1278            embedder_id: expected_id.clone(),
1279            model_revision: "abc123".to_owned(),
1280            schema_version: SEMANTIC_SCHEMA_VERSION,
1281            chunking_version: CHUNKING_STRATEGY_VERSION,
1282            doc_count: 1000,
1283            built_at_ms: 1700000000000,
1284        };
1285
1286        // Case 1: Everything matches → UpToDate
1287        assert_eq!(
1288            base_manifest.invalidation_action(&policy, "abc123", &expected_id),
1289            InvalidationAction::UpToDate,
1290        );
1291
1292        // Case 2: Model revision changed → soft rebuild
1293        assert_eq!(
1294            base_manifest.invalidation_action(&policy, "def456", &expected_id),
1295            InvalidationAction::RebuildInBackground,
1296        );
1297
1298        // Case 3: Schema version changed → hard rebuild
1299        {
1300            let mut m = base_manifest.clone();
1301            m.schema_version = 0;
1302            let action = m.invalidation_action(&policy, "abc123", &expected_id);
1303            assert!(matches!(
1304                action,
1305                InvalidationAction::DiscardAndRebuild { .. }
1306            ));
1307        }
1308
1309        // Case 4: Chunking version changed → hard rebuild
1310        {
1311            let mut m = base_manifest.clone();
1312            m.chunking_version = 0;
1313            let action = m.invalidation_action(&policy, "abc123", &expected_id);
1314            assert!(matches!(
1315                action,
1316                InvalidationAction::DiscardAndRebuild { .. }
1317            ));
1318        }
1319
1320        // Case 5: Embedder ID changed → hard rebuild
1321        {
1322            let mut m = base_manifest.clone();
1323            m.embedder_id = "snowflake-768".to_owned();
1324            let action = m.invalidation_action(&policy, "abc123", &expected_id);
1325            assert!(matches!(
1326                action,
1327                InvalidationAction::DiscardAndRebuild { .. }
1328            ));
1329        }
1330
1331        // Case 6: Mode changed to lexical-only → evict
1332        {
1333            let mut lex_policy = policy.clone();
1334            lex_policy.mode = SemanticMode::LexicalOnly;
1335            assert_eq!(
1336                base_manifest.invalidation_action(&lex_policy, "abc123", &expected_id),
1337                InvalidationAction::Evict,
1338            );
1339        }
1340    }
1341
1342    // ── Eviction order ─────────────────────────────────────────────────
1343
1344    #[test]
1345    fn eviction_order_hnsw_first_model_last() {
1346        assert_eq!(EVICTION_ORDER[0], SemanticArtifactKind::HnswAccelerator);
1347        assert_eq!(EVICTION_ORDER[1], SemanticArtifactKind::QualityVectorIndex);
1348        assert_eq!(EVICTION_ORDER[2], SemanticArtifactKind::FastVectorIndex);
1349        assert_eq!(EVICTION_ORDER[3], SemanticArtifactKind::ModelFiles);
1350    }
1351
1352    #[test]
1353    fn artifact_required_for_capability() {
1354        use SemanticArtifactKind::*;
1355        use SemanticCapability::*;
1356
1357        let cases: &[(SemanticArtifactKind, SemanticCapability, bool)] = &[
1358            // HNSW is never required
1359            (HnswAccelerator, FullQuality, false),
1360            (HnswAccelerator, FastTierOnly, false),
1361            (HnswAccelerator, LexicalOnly, false),
1362            // Nothing required for lexical-only
1363            (ModelFiles, LexicalOnly, false),
1364            (QualityVectorIndex, LexicalOnly, false),
1365            (FastVectorIndex, LexicalOnly, false),
1366            // FastTierOnly needs fast index but not model/quality
1367            (FastVectorIndex, FastTierOnly, true),
1368            (QualityVectorIndex, FastTierOnly, false),
1369            (ModelFiles, FastTierOnly, false),
1370            // FullQuality needs everything except HNSW
1371            (ModelFiles, FullQuality, true),
1372            (QualityVectorIndex, FullQuality, true),
1373            (FastVectorIndex, FullQuality, true),
1374        ];
1375
1376        for (artifact, cap, expected) in cases {
1377            assert_eq!(
1378                artifact.required_for(cap),
1379                *expected,
1380                "{artifact:?} required_for {cap:?}"
1381            );
1382        }
1383    }
1384
1385    // ── Robot-friendly fixture payloads ─────────────────────────────────
1386
1387    #[test]
1388    fn fixture_no_model_state() {
1389        let policy = SemanticPolicy::compiled_defaults();
1390        let cap = SemanticCapability::FastTierOnly;
1391        let report = SemanticCapabilityReport::from_policy(&policy, cap, 0);
1392
1393        assert_eq!(report.mode, SemanticMode::HybridPreferred);
1394        assert!(report.summary.contains("hash embedder only"));
1395        assert_eq!(report.current_usage_mb, 0);
1396
1397        // Verify serialization round-trips.
1398        let json = serde_json::to_string_pretty(&report).unwrap();
1399        let deser: SemanticCapabilityReport = serde_json::from_str(&json).unwrap();
1400        assert_eq!(deser.mode, report.mode);
1401        assert_eq!(deser.fast_tier_embedder, "hash");
1402    }
1403
1404    #[test]
1405    fn fixture_fast_tier_only_state() {
1406        let policy = SemanticPolicy::compiled_defaults();
1407        let cap = SemanticCapability::FastTierOnly;
1408        let report = SemanticCapabilityReport::from_policy(&policy, cap, 0);
1409
1410        assert_eq!(report.capability, SemanticCapability::FastTierOnly);
1411        assert_eq!(report.quality_tier_embedder, "minilm");
1412        assert_eq!(report.download_policy, ModelDownloadPolicy::OptIn);
1413    }
1414
1415    #[test]
1416    fn fixture_full_quality_state() {
1417        let policy = SemanticPolicy::compiled_defaults();
1418        let cap = SemanticCapability::FullQuality;
1419        let report = SemanticCapabilityReport::from_policy(&policy, cap, 95);
1420
1421        assert_eq!(report.capability, SemanticCapability::FullQuality);
1422        assert_eq!(report.current_usage_mb, 95);
1423        assert!(report.summary.contains("Full semantic"));
1424
1425        let json = serde_json::to_string_pretty(&report).unwrap();
1426        let deser: SemanticCapabilityReport = serde_json::from_str(&json).unwrap();
1427        assert_eq!(deser.current_usage_mb, 95);
1428    }
1429
1430    // ── Serialization round-trip ───────────────────────────────────────
1431
1432    #[test]
1433    fn policy_json_round_trip() {
1434        let policy = SemanticPolicy::compiled_defaults();
1435        let json = serde_json::to_string(&policy).unwrap();
1436        let deser: SemanticPolicy = serde_json::from_str(&json).unwrap();
1437        assert_eq!(deser, policy);
1438    }
1439
1440    #[test]
1441    fn asset_manifest_json_round_trip() {
1442        let manifest = SemanticAssetManifest {
1443            embedder_id: "minilm-384".to_owned(),
1444            model_revision: "abc123".to_owned(),
1445            schema_version: 1,
1446            chunking_version: 1,
1447            doc_count: 5000,
1448            built_at_ms: 1700000000000,
1449        };
1450        let json = serde_json::to_string(&manifest).unwrap();
1451        let deser: SemanticAssetManifest = serde_json::from_str(&json).unwrap();
1452        assert_eq!(deser, manifest);
1453    }
1454
1455    // ── Effective-settings introspection ────────────────────────────────
1456
1457    #[test]
1458    fn effective_settings_all_defaults() {
1459        let cli = CliSemanticOverrides::default();
1460        let settings = EffectiveSettings::resolve(&cli);
1461
1462        // All settings should exist.
1463        assert!(settings.settings.len() >= 15);
1464
1465        // All should be compiled defaults (no env or CLI set).
1466        for s in &settings.settings {
1467            assert_eq!(
1468                s.source,
1469                SettingSource::CompiledDefault,
1470                "setting '{}' should be CompiledDefault, got {:?}",
1471                s.name,
1472                s.source
1473            );
1474        }
1475
1476        // Verify specific values.
1477        let mode = settings.get("mode").unwrap();
1478        assert_eq!(mode.value, "hybrid_preferred");
1479
1480        let budget = settings.get("semantic_budget_mb").unwrap();
1481        assert_eq!(budget.value, "500");
1482
1483        // Verify all policy fields are represented, including those
1484        // without env vars.
1485        assert!(settings.get("fast_tier_embedder").is_some());
1486        assert!(settings.get("reranker").is_some());
1487        assert_eq!(settings.get("reranker").unwrap().value, "ms-marco-minilm");
1488    }
1489
1490    #[test]
1491    fn effective_settings_cli_overrides_show_cli_source() {
1492        let cli = CliSemanticOverrides {
1493            mode: Some(SemanticMode::LexicalOnly),
1494            semantic_budget_mb: Some(100),
1495            ..Default::default()
1496        };
1497        let settings = EffectiveSettings::resolve(&cli);
1498
1499        let mode = settings.get("mode").unwrap();
1500        assert_eq!(mode.value, "lexical_only");
1501        assert_eq!(mode.source, SettingSource::Cli);
1502
1503        let budget = settings.get("semantic_budget_mb").unwrap();
1504        assert_eq!(budget.value, "100");
1505        assert_eq!(budget.source, SettingSource::Cli);
1506
1507        // Non-overridden fields remain default.
1508        let fast_dim = settings.get("fast_dimension").unwrap();
1509        assert_eq!(fast_dim.source, SettingSource::CompiledDefault);
1510    }
1511
1512    #[test]
1513    fn effective_settings_lookup_by_name() {
1514        let cli = CliSemanticOverrides::default();
1515        let settings = EffectiveSettings::resolve(&cli);
1516
1517        assert!(settings.get("mode").is_some());
1518        assert!(settings.get("semantic_schema_version").is_some());
1519        assert!(settings.get("nonexistent").is_none());
1520    }
1521
1522    #[test]
1523    fn effective_settings_environment_overrides_show_environment_source() {
1524        let settings =
1525            EffectiveSettings::resolve_with_env_lookup(&CliSemanticOverrides::default(), |key| {
1526                match key {
1527                    "CASS_SEMANTIC_MODE" => Some("lexical_only".to_string()),
1528                    "CASS_SEMANTIC_BUDGET_MB" => Some("321".to_string()),
1529                    _ => None,
1530                }
1531            });
1532
1533        let mode = settings.get("mode").unwrap();
1534        assert_eq!(mode.value, "lexical_only");
1535        assert_eq!(mode.source, SettingSource::Environment);
1536
1537        let budget = settings.get("semantic_budget_mb").unwrap();
1538        assert_eq!(budget.value, "321");
1539        assert_eq!(budget.source, SettingSource::Environment);
1540    }
1541
1542    #[test]
1543    fn effective_settings_download_policy_uses_snake_case_value() {
1544        let settings =
1545            EffectiveSettings::resolve_with_env_lookup(&CliSemanticOverrides::default(), |key| {
1546                match key {
1547                    "CASS_SEMANTIC_DOWNLOAD_POLICY" => Some("budget_gated".to_string()),
1548                    _ => None,
1549                }
1550            });
1551
1552        let policy = settings.get("download_policy").unwrap();
1553        assert_eq!(policy.value, "budget_gated");
1554        assert_eq!(policy.source, SettingSource::Environment);
1555    }
1556
1557    #[test]
1558    fn effective_settings_json_round_trip() {
1559        let cli = CliSemanticOverrides {
1560            mode: Some(SemanticMode::StrictSemantic),
1561            ..Default::default()
1562        };
1563        let settings = EffectiveSettings::resolve(&cli);
1564        let json = serde_json::to_string_pretty(&settings).unwrap();
1565        let deser: EffectiveSettings = serde_json::from_str(&json).unwrap();
1566        assert_eq!(deser.settings.len(), settings.settings.len());
1567        assert_eq!(deser.get("mode").unwrap().value, "strict_semantic");
1568    }
1569
1570    #[test]
1571    fn effective_settings_source_counts() {
1572        let cli = CliSemanticOverrides {
1573            mode: Some(SemanticMode::LexicalOnly),
1574            semantic_budget_mb: Some(200),
1575            ..Default::default()
1576        };
1577        let settings = EffectiveSettings::resolve(&cli);
1578        let counts = settings.source_counts();
1579
1580        assert_eq!(*counts.get(&SettingSource::Cli).unwrap_or(&0), 2);
1581        // Everything else is compiled default.
1582        assert!(*counts.get(&SettingSource::CompiledDefault).unwrap_or(&0) > 10);
1583    }
1584
1585    #[test]
1586    fn effective_settings_version_fields_always_compiled() {
1587        let cli = CliSemanticOverrides::default();
1588        let settings = EffectiveSettings::resolve(&cli);
1589
1590        let schema = settings.get("semantic_schema_version").unwrap();
1591        assert_eq!(schema.source, SettingSource::CompiledDefault);
1592        assert!(schema.env_var.is_none()); // not overridable
1593
1594        let chunking = settings.get("chunking_strategy_version").unwrap();
1595        assert_eq!(chunking.source, SettingSource::CompiledDefault);
1596        assert!(chunking.env_var.is_none());
1597    }
1598}
coding_agent_search/search/policy.rs

coding_agent_search/search/
policy.rs