Skip to main content

innate_core/kb/
mod.rs

1//! KnowledgeBase — all 8 Public APIs.
2
3/// Return type for pack(): (selected_chunks, skipped_groups, skip_reasons)
4type PackResult = (
5    Vec<Value>,
6    Vec<(Vec<Value>, f64, usize)>,
7    std::collections::HashMap<String, String>,
8);
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::Arc;
13
14use serde_json::{json, Value};
15
16use crate::embedding::{DummyEmbeddingProvider, EmbeddingProvider};
17use crate::errors::{InnateError, Result};
18use crate::refine::{
19    DefaultSanitizer, DistilledChunk, Distiller, HeuristicDistiller, NullRefiner, Refiner,
20    Sanitizer,
21};
22use crate::storage::{ChunkRow, EpisodicLogRow, Storage};
23use crate::utils::{
24    content_hash, estimate_tokens, gen_uuid, pack_embedding, utc_now_iso, SanitizeAction,
25};
26
27mod appraise;
28mod curate;
29mod evolve;
30mod inspection;
31mod lifecycle;
32mod recall;
33mod record;
34mod situation;
35
36pub use appraise::{
37    AppraiseParams, Contributor, FlaggedPoint, Tier, Valence, Verdict,
38};
39pub use recall::RecallParams;
40pub use record::RecordParams;
41pub use situation::Situation;
42
43// ---------------------------------------------------------------------------
44// Tuning defaults
45// ---------------------------------------------------------------------------
46
47// Fused recall score weights. These intentionally sum to 1.05 (not 1.0): the
48// score is a relative ranking signal, not a calibrated probability, so the extra
49// 0.05 of headroom on content similarity is deliberate and the result is never
50// re-normalised. Keep this in mind before "fixing" the sum.
51const W_CONTENT: f64 = 0.55;
52const W_TRIGGER: f64 = 0.25;
53const W_CONFIDENCE: f64 = 0.10;
54const W_CONTEXT: f64 = 0.15;
55const TOP_K_CANDIDATES: usize = 20;
56const ANTI_TRIGGER_PENALTY: f64 = 0.6;
57const DENSITY_REFILL: bool = true;
58
59const LOW_CONF_THRESHOLD: f64 = 0.25;
60const LOW_CONF_IDLE_DAYS: i64 = 60;
61const REPEAT_SELECT_MIN: i64 = 10;
62const REPEAT_SELECT_CONF_MAX: f64 = 0.5;
63const NEVER_USED_AGE_DAYS: i64 = 30;
64const OPEN_TTL_DAYS: i64 = 14;
65const SCREENING_TIMEOUT_MINUTES: i64 = 30;
66const PROMOTE_USED_SUCCESS_MIN: i64 = 3;
67const PROMOTE_CONFIDENCE_MIN: f64 = 0.60;
68const DECAY_FLOOR: f64 = 0.20;
69const EVOLVE_THRESHOLD: i64 = 5;
70const DISTILL_BATCH_SIZE: usize = 20;
71const PENDING_RECALL_PENALTY: f64 = 0.60;
72
73// Intuition / appraise critic defaults (Spec §8). The appraise path reuses the
74// same fused score as recall; these only govern how that score is tiered/flagged.
75const APPRAISE_TIER_WEAK: f64 = 0.30;
76const APPRAISE_TIER_STRONG: f64 = 0.65;
77const APPRAISE_MIN_STRENGTH: f64 = 0.40;
78const APPRAISE_TOP: usize = 8;
79const APPRAISE_TRIGGER_HIT_MIN: f64 = 0.50;
80const APPRAISE_CANDIDATE_IN_EMBED: bool = true;
81const SITUATION_COARSE_KEYS: &str = "stage,error_class,file_type";
82const GOVERNANCE_ARCHIVE_THRESHOLD: i64 = 3;
83const NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD: i64 = 5;
84const GOVERNANCE_EVOLVE_THRESHOLD: i64 = 3;
85const FAILURE_MIN_USES: i64 = 5;
86const FAILURE_MAX_SUCCESS_RATE: f64 = 0.20;
87const FAILURE_CONFIDENCE_MAX: f64 = 0.35;
88const LOG_COMPACT_DAYS: i64 = 30;
89
90// ---------------------------------------------------------------------------
91// Public result types
92// ---------------------------------------------------------------------------
93
94#[derive(Debug, Default, Clone)]
95pub struct RecallResult {
96    pub knowledge: Vec<Value>,
97    pub sparks: Vec<Value>,
98    pub trace_id: String,
99    pub empty: bool,
100    pub depth_skipped: Vec<String>,
101    pub skipped_reasons: HashMap<String, String>,
102}
103
104#[derive(Debug, Default)]
105pub struct CurateReport {
106    pub archived: Vec<String>,
107    pub deduped: Vec<String>,
108    pub decayed: Vec<String>,
109    pub cycles: Vec<Vec<String>>,
110    pub orphans: Vec<String>,
111    pub recovered: Vec<String>,
112    pub warnings: Vec<String>,
113    pub stats: HashMap<String, Value>,
114}
115
116#[derive(Debug, Default)]
117struct DistillBatchReport {
118    distilled: usize,
119    failed: usize,
120}
121
122/// Scope for a single Curate run — allows limiting governance to a subset of chunks.
123#[derive(Debug, Default, Clone)]
124pub struct CurateScope {
125    /// If set, only process chunks with this origin (e.g. "distilled").
126    pub origin: Option<String>,
127    /// If set, only process chunks belonging to this skill.
128    pub skill_name: Option<String>,
129    /// When true, compute the report but do not write any changes.
130    pub dry_run: bool,
131}
132
133/// Replaceable governance interface (§二·六). Inject via `KnowledgeBase::open_with`.
134/// Default implementation: `BuiltinCurator`.
135pub trait Curator: Send + Sync {
136    fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport>;
137}
138
139/// Built-in curator — implements the full §四 governance pipeline.
140pub struct BuiltinCurator;
141
142impl Curator for BuiltinCurator {
143    fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport> {
144        kb.builtin_curate_impl(scope)
145    }
146}
147
148// ---------------------------------------------------------------------------
149// KnowledgeBase
150// ---------------------------------------------------------------------------
151
152pub struct KnowledgeBase {
153    pub storage: Storage,
154    embedding: Arc<dyn EmbeddingProvider>,
155    refiner: Arc<dyn Refiner>,
156    distiller: Arc<dyn Distiller>,
157    curator: Arc<dyn Curator>,
158    sanitizer: Arc<dyn Sanitizer>,
159
160    // Tuning params (loaded from meta at init)
161    w_content: f64,
162    w_trigger: f64,
163    w_confidence: f64,
164    w_context: f64,
165    top_k_candidates: usize,
166    anti_trigger_penalty: f64,
167    density_refill: bool,
168
169    low_conf_threshold: f64,
170    low_conf_idle_days: i64,
171    repeat_select_min: i64,
172    repeat_select_conf_max: f64,
173    never_used_age_days: i64,
174    open_ttl_days: i64,
175    screening_timeout_minutes: i64,
176    promote_used_success_min: i64,
177    promote_confidence_min: f64,
178    decay_floor: f64,
179    evolve_threshold: i64,
180    distill_batch_size: usize,
181    evolve_schedule_interval_hours: i64,
182    governance_archive_threshold: i64,
183    negative_feedback_archive_threshold: i64,
184    governance_evolve_threshold: i64,
185    governance_proposal_max_age_days: i64,
186    failure_min_uses: i64,
187    failure_max_success_rate: f64,
188    failure_confidence_max: f64,
189    log_compact_days: i64,
190
191    // Intuition / appraise critic params
192    appraise_tier_weak: f64,
193    appraise_tier_strong: f64,
194    appraise_min_strength: f64,
195    appraise_top: usize,
196    appraise_trigger_hit_min: f64,
197    appraise_candidate_in_embed: bool,
198    situation_coarse_keys: String,
199}
200
201impl KnowledgeBase {
202    pub fn open(db_path: impl AsRef<Path>) -> Result<Self> {
203        Self::open_with(db_path, None, None, None, None, None)
204    }
205
206    /// Persist a content embedding, rejecting any vector whose dimension differs
207    /// from the configured provider. A mismatched vector is silently skipped at
208    /// search time (cosine search only scores equal-dimension vectors), so an
209    /// unchecked write becomes invisible recall loss with no error. Fail closed
210    /// at the write boundary instead. All vector writers route through here.
211    pub(crate) fn store_vec_content(&self, chunk_id: &str, cvec: &[f32]) -> Result<()> {
212        let want = self.embedding.content_dim();
213        if cvec.len() != want {
214            return Err(InnateError::InvalidState(format!(
215                "content embedding dim {} != configured {want} (chunk {chunk_id})",
216                cvec.len()
217            )));
218        }
219        self.storage
220            .insert_vec_content(chunk_id, &pack_embedding(cvec))
221    }
222
223    /// Trigger-vector counterpart of [`store_vec_content`]; same fail-closed
224    /// dimension guard against `trigger_dim()`.
225    pub(crate) fn store_vec_trigger(&self, chunk_id: &str, tvec: &[f32]) -> Result<()> {
226        let want = self.embedding.trigger_dim();
227        if tvec.len() != want {
228            return Err(InnateError::InvalidState(format!(
229                "trigger embedding dim {} != configured {want} (chunk {chunk_id})",
230                tvec.len()
231            )));
232        }
233        self.storage
234            .insert_vec_trigger(chunk_id, &pack_embedding(tvec))
235    }
236
237    pub fn open_with(
238        db_path: impl AsRef<Path>,
239        embedding: Option<Arc<dyn EmbeddingProvider>>,
240        refiner: Option<Arc<dyn Refiner>>,
241        distiller: Option<Arc<dyn Distiller>>,
242        curator: Option<Arc<dyn Curator>>,
243        sanitizer: Option<Arc<dyn Sanitizer>>,
244    ) -> Result<Self> {
245        let embedding = embedding.unwrap_or_else(|| Arc::new(DummyEmbeddingProvider::default()));
246        let refiner = refiner.unwrap_or_else(|| Arc::new(NullRefiner));
247        let distiller = distiller.unwrap_or_else(|| Arc::new(HeuristicDistiller));
248        let curator = curator.unwrap_or_else(|| Arc::new(BuiltinCurator));
249        let sanitizer = sanitizer.unwrap_or_else(|| Arc::new(DefaultSanitizer));
250
251        let storage = Storage::open(db_path, embedding.content_dim(), embedding.trigger_dim())?;
252
253        let mut kb = Self {
254            storage,
255            embedding,
256            refiner,
257            distiller,
258            curator,
259            sanitizer,
260            w_content: W_CONTENT,
261            w_trigger: W_TRIGGER,
262            w_confidence: W_CONFIDENCE,
263            w_context: W_CONTEXT,
264            top_k_candidates: TOP_K_CANDIDATES,
265            anti_trigger_penalty: ANTI_TRIGGER_PENALTY,
266            density_refill: DENSITY_REFILL,
267            low_conf_threshold: LOW_CONF_THRESHOLD,
268            low_conf_idle_days: LOW_CONF_IDLE_DAYS,
269            repeat_select_min: REPEAT_SELECT_MIN,
270            repeat_select_conf_max: REPEAT_SELECT_CONF_MAX,
271            never_used_age_days: NEVER_USED_AGE_DAYS,
272            open_ttl_days: OPEN_TTL_DAYS,
273            screening_timeout_minutes: SCREENING_TIMEOUT_MINUTES,
274            promote_used_success_min: PROMOTE_USED_SUCCESS_MIN,
275            promote_confidence_min: PROMOTE_CONFIDENCE_MIN,
276            decay_floor: DECAY_FLOOR,
277            evolve_threshold: EVOLVE_THRESHOLD,
278            distill_batch_size: DISTILL_BATCH_SIZE,
279            evolve_schedule_interval_hours: 6,
280            governance_archive_threshold: GOVERNANCE_ARCHIVE_THRESHOLD,
281            negative_feedback_archive_threshold: NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
282            governance_evolve_threshold: GOVERNANCE_EVOLVE_THRESHOLD,
283            governance_proposal_max_age_days: 30,
284            failure_min_uses: FAILURE_MIN_USES,
285            failure_max_success_rate: FAILURE_MAX_SUCCESS_RATE,
286            failure_confidence_max: FAILURE_CONFIDENCE_MAX,
287            log_compact_days: LOG_COMPACT_DAYS,
288            appraise_tier_weak: APPRAISE_TIER_WEAK,
289            appraise_tier_strong: APPRAISE_TIER_STRONG,
290            appraise_min_strength: APPRAISE_MIN_STRENGTH,
291            appraise_top: APPRAISE_TOP,
292            appraise_trigger_hit_min: APPRAISE_TRIGGER_HIT_MIN,
293            appraise_candidate_in_embed: APPRAISE_CANDIDATE_IN_EMBED,
294            situation_coarse_keys: SITUATION_COARSE_KEYS.to_string(),
295        };
296        kb.init_meta()?;
297        kb.load_params()?;
298        Ok(kb)
299    }
300
301    fn init_meta(&self) -> Result<()> {
302        let lib_id = gen_uuid();
303        let content_dim = self.embedding.content_dim().to_string();
304        let trigger_dim = self.embedding.trigger_dim().to_string();
305        let embed_model = self.embedding.model_name();
306
307        for (key, expected) in [
308            ("content_dim", self.embedding.content_dim()),
309            ("trigger_dim", self.embedding.trigger_dim()),
310        ] {
311            if let Some(stored) = self.storage.get_meta(key)? {
312                let actual = stored.parse::<usize>().map_err(|_| {
313                    InnateError::Other(format!("invalid {key} metadata value: {stored}"))
314                })?;
315                if actual != expected {
316                    return Err(InnateError::Other(format!(
317                        "{key} mismatch: database uses {actual}, embedding provider uses {expected}"
318                    )));
319                }
320            }
321        }
322
323        let defaults: &[(&str, &str)] = &[
324            ("lib_id", &lib_id),
325            ("lib_role", "personal"),
326            ("schema_version", "4.14"),
327            ("content_dim", &content_dim),
328            ("trigger_dim", &trigger_dim),
329            ("embed_model", embed_model),
330            ("embed_version", "1"),
331            ("vector_revision", "0"),
332            ("last_agg_ts", "1970-01-01T00:00:00.000Z"),
333            ("recall.w_content", "0.55"),
334            ("recall.w_trigger", "0.25"),
335            ("recall.w_confidence", "0.10"),
336            ("recall.w_context", "0.15"),
337            ("recall.top_k_candidates", "20"),
338            ("recall.anti_trigger_penalty", "0.6"),
339            ("recall.density_refill", "true"),
340            ("curate.low_conf_threshold", "0.25"),
341            ("curate.low_conf_idle_days", "60"),
342            ("curate.repeat_select_min", "10"),
343            ("curate.repeat_select_conf_max", "0.5"),
344            ("curate.never_used_age_days", "30"),
345            ("curate.open_ttl_days", "14"),
346            ("curate.screening_timeout_minutes", "30"),
347            ("curate.promote_used_success_min", "3"),
348            ("curate.promote_confidence_min", "0.60"),
349            ("curate.decay_floor", "0.20"),
350            ("evolve.threshold_new_count", "5"),
351            ("evolve.distill_batch_size", "20"),
352            ("evolve.schedule_interval_hours", "6"),
353            ("curate.soft_mature_threshold", "5"),
354            ("evolve.distill_token_window_hours", "24"),
355            ("curate.governance_archive_threshold", "3"),
356            ("curate.negative_feedback_archive_threshold", "5"),
357            ("evolve.governance_pending_threshold", "3"),
358            ("curate.governance_proposal_max_age_days", "30"),
359            ("curate.failure_min_uses", "5"),
360            ("curate.failure_max_success_rate", "0.20"),
361            ("curate.failure_confidence_max", "0.35"),
362            ("curate.log_compact_days", "30"),
363            ("appraise.tier_weak", "0.30"),
364            ("appraise.tier_strong", "0.65"),
365            ("appraise.min_strength", "0.40"),
366            ("appraise.top", "8"),
367            ("appraise.trigger_hit_min", "0.50"),
368            ("appraise.candidate_in_embed", "true"),
369            ("situation.coarse_keys", "stage,error_class,file_type"),
370        ];
371        self.storage.begin_immediate()?;
372        let result = (|| -> Result<()> {
373            for (k, v) in defaults {
374                if self.storage.get_meta(k)?.is_none() {
375                    self.storage.set_meta(k, v)?;
376                }
377            }
378            self.storage.commit()
379        })();
380        if result.is_err() {
381            let _ = self.storage.rollback();
382        }
383        result
384    }
385
386    fn load_params(&mut self) -> Result<()> {
387        let f = |k: &str, d: f64| -> f64 {
388            self.storage
389                .get_meta(k)
390                .ok()
391                .flatten()
392                .and_then(|v| v.parse().ok())
393                .unwrap_or(d)
394        };
395        let i = |k: &str, d: i64| -> i64 {
396            self.storage
397                .get_meta(k)
398                .ok()
399                .flatten()
400                .and_then(|v| v.parse().ok())
401                .unwrap_or(d)
402        };
403        let b = |k: &str, d: bool| -> bool {
404            self.storage
405                .get_meta(k)
406                .ok()
407                .flatten()
408                .map(|v| v.to_lowercase() == "true")
409                .unwrap_or(d)
410        };
411        self.w_content = f("recall.w_content", W_CONTENT);
412        self.w_trigger = f("recall.w_trigger", W_TRIGGER);
413        self.w_confidence = f("recall.w_confidence", W_CONFIDENCE);
414        self.w_context = f("recall.w_context", W_CONTEXT);
415        self.top_k_candidates =
416            i("recall.top_k_candidates", TOP_K_CANDIDATES as i64).max(1) as usize;
417        self.anti_trigger_penalty = f("recall.anti_trigger_penalty", ANTI_TRIGGER_PENALTY);
418        self.density_refill = b("recall.density_refill", DENSITY_REFILL);
419        self.low_conf_threshold = f("curate.low_conf_threshold", LOW_CONF_THRESHOLD);
420        self.low_conf_idle_days = i("curate.low_conf_idle_days", LOW_CONF_IDLE_DAYS);
421        self.repeat_select_min = i("curate.repeat_select_min", REPEAT_SELECT_MIN);
422        self.repeat_select_conf_max = f("curate.repeat_select_conf_max", REPEAT_SELECT_CONF_MAX);
423        self.never_used_age_days = i("curate.never_used_age_days", NEVER_USED_AGE_DAYS);
424        self.open_ttl_days = i("curate.open_ttl_days", OPEN_TTL_DAYS);
425        self.screening_timeout_minutes = i(
426            "curate.screening_timeout_minutes",
427            SCREENING_TIMEOUT_MINUTES,
428        );
429        self.promote_used_success_min =
430            i("curate.promote_used_success_min", PROMOTE_USED_SUCCESS_MIN);
431        self.promote_confidence_min = f("curate.promote_confidence_min", PROMOTE_CONFIDENCE_MIN);
432        self.decay_floor = f("curate.decay_floor", DECAY_FLOOR).clamp(0.0, 0.4);
433        self.evolve_threshold = i("evolve.threshold_new_count", EVOLVE_THRESHOLD);
434        self.distill_batch_size =
435            i("evolve.distill_batch_size", DISTILL_BATCH_SIZE as i64) as usize;
436        self.evolve_schedule_interval_hours = i("evolve.schedule_interval_hours", 6).max(1);
437        self.governance_archive_threshold = i(
438            "curate.governance_archive_threshold",
439            GOVERNANCE_ARCHIVE_THRESHOLD,
440        )
441        .max(1);
442        self.negative_feedback_archive_threshold = i(
443            "curate.negative_feedback_archive_threshold",
444            NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
445        )
446        .max(1);
447        self.governance_evolve_threshold = i(
448            "evolve.governance_pending_threshold",
449            GOVERNANCE_EVOLVE_THRESHOLD,
450        )
451        .max(1);
452        self.governance_proposal_max_age_days =
453            i("curate.governance_proposal_max_age_days", 30).max(1);
454        self.failure_min_uses = i("curate.failure_min_uses", FAILURE_MIN_USES).max(1);
455        self.failure_max_success_rate =
456            f("curate.failure_max_success_rate", FAILURE_MAX_SUCCESS_RATE).clamp(0.0, 1.0);
457        self.failure_confidence_max =
458            f("curate.failure_confidence_max", FAILURE_CONFIDENCE_MAX).clamp(0.0, 1.0);
459        self.log_compact_days = i("curate.log_compact_days", LOG_COMPACT_DAYS).max(1);
460        let s = |k: &str, d: &str| -> String {
461            self.storage
462                .get_meta(k)
463                .ok()
464                .flatten()
465                .filter(|v| !v.trim().is_empty())
466                .unwrap_or_else(|| d.to_string())
467        };
468        self.appraise_tier_weak = f("appraise.tier_weak", APPRAISE_TIER_WEAK).clamp(0.0, 1.0);
469        self.appraise_tier_strong =
470            f("appraise.tier_strong", APPRAISE_TIER_STRONG).clamp(0.0, 1.0);
471        self.appraise_min_strength =
472            f("appraise.min_strength", APPRAISE_MIN_STRENGTH).clamp(0.0, 1.0);
473        self.appraise_top = i("appraise.top", APPRAISE_TOP as i64).max(1) as usize;
474        self.appraise_trigger_hit_min =
475            f("appraise.trigger_hit_min", APPRAISE_TRIGGER_HIT_MIN).clamp(0.0, 1.0);
476        self.appraise_candidate_in_embed =
477            b("appraise.candidate_in_embed", APPRAISE_CANDIDATE_IN_EMBED);
478        self.situation_coarse_keys = s("situation.coarse_keys", SITUATION_COARSE_KEYS);
479        Ok(())
480    }
481}
482
483// ---------------------------------------------------------------------------
484// Helpers
485// ---------------------------------------------------------------------------
486
487struct CandidateInfo {
488    chunk: Value,
489    sim_content: f32,
490    sim_trigger: f32,
491}
492
493fn chunk_is_valid_for_recall(chunk: &Value, embed_version: i64) -> bool {
494    chunk.get("state").and_then(Value::as_str) != Some("archived")
495        && chunk.get("origin").and_then(Value::as_str) != Some("spark")
496        && chunk
497            .get("embed_version")
498            .and_then(Value::as_i64)
499            .unwrap_or(1)
500            >= embed_version
501}
502
503/// Normalize a query string before hashing into a context_key.
504///
505/// Goals: collapse whitespace variations and case differences so that
506/// semantically equivalent queries (same words, different capitalisation or
507/// spacing) accumulate statistics in the same context_stat bucket.
508///
509/// Deliberately conservative: no stemming, no stop-word removal. The canonical
510/// query guidance in SKILL.md handles vocabulary consistency at the agent level.
511fn normalize_query(query: &str) -> String {
512    const STOP_WORDS: &[&str] = &[
513        "a", "an", "and", "for", "in", "of", "on", "the", "to", "with",
514    ];
515    let cleaned: String = query
516        .to_lowercase()
517        .chars()
518        .map(|ch| {
519            if ch.is_alphanumeric() || ch.is_whitespace() {
520                ch
521            } else {
522                ' '
523            }
524        })
525        .collect();
526    let mut tokens: Vec<&str> = cleaned
527        .split_whitespace()
528        .filter(|token| !STOP_WORDS.contains(token))
529        .collect();
530    tokens.sort_unstable();
531    tokens.dedup();
532    tokens.join(" ")
533}
534
535fn estimate_distill_prompt_tokens(log: &Value, related_logs: &[Value]) -> i64 {
536    let primary: i64 = [
537        "query",
538        "recall_snapshot",
539        "output",
540        "output_summary",
541        "nomination",
542    ]
543    .iter()
544    .filter_map(|key| log.get(*key).and_then(Value::as_str))
545    .map(|text| estimate_tokens(text) as i64)
546    .sum();
547    let log_id = log.get("id").and_then(Value::as_str).unwrap_or("");
548    let context_key = log.get("context_key").and_then(Value::as_str);
549    let related: i64 = related_logs
550        .iter()
551        .filter(|other| other.get("id").and_then(Value::as_str).unwrap_or("") != log_id)
552        .filter(|other| {
553            context_key.is_some() && other.get("context_key").and_then(Value::as_str) == context_key
554        })
555        .take(4)
556        .flat_map(|other| {
557            ["query", "output_summary", "outcome"]
558                .into_iter()
559                .filter_map(|key| other.get(key).and_then(Value::as_str))
560        })
561        .map(|text| estimate_tokens(text) as i64)
562        .sum();
563    primary + related
564}
565
566fn estimate_distilled_chunk_tokens(chunk: &DistilledChunk) -> i64 {
567    estimate_tokens(&chunk.content) as i64
568        + chunk
569            .trigger_desc
570            .as_deref()
571            .map(estimate_tokens)
572            .unwrap_or(0) as i64
573        + chunk
574            .anti_trigger_desc
575            .as_deref()
576            .map(estimate_tokens)
577            .unwrap_or(0) as i64
578}
579
580fn anti_trigger_hit(query: &str, anti: &str) -> bool {
581    let q_lower = query.to_lowercase();
582    anti.to_lowercase().split(',').any(|part| {
583        let p = part.trim();
584        !p.is_empty() && q_lower.contains(p)
585    })
586}
587
588fn block_cost(block: &[Value]) -> usize {
589    block
590        .iter()
591        .map(|b| {
592            b.get("token_count")
593                .and_then(Value::as_u64)
594                .map(|t| t as usize)
595                .unwrap_or_else(|| {
596                    estimate_tokens(b.get("content").and_then(Value::as_str).unwrap_or("")).max(100)
597                })
598        })
599        .sum()
600}
601
602fn limit_knowledge(knowledge: Vec<Value>, top: Option<usize>) -> Vec<Value> {
603    match top {
604        None => knowledge,
605        Some(0) => vec![],
606        Some(n) => knowledge.into_iter().take(n).collect(),
607    }
608}
609
610fn usage_state(used: Option<&[String]>) -> &'static str {
611    match used {
612        None => "unknown",
613        Some([]) => "known_none",
614        Some(_) => "known_some",
615    }
616}
617
618fn ratio(numerator: i64, denominator: i64) -> f64 {
619    if denominator <= 0 {
620        0.0
621    } else {
622        ((numerator as f64 / denominator as f64) * 1000.0).round() / 1000.0
623    }
624}
625
626fn validate_source(source: &str) -> Result<()> {
627    if !matches!(
628        source,
629        "mcp" | "sdk" | "cli" | "hook" | "daemon" | "augmented"
630    ) {
631        return Err(InnateError::InvalidState(format!(
632            "invalid event source: {source}"
633        )));
634    }
635    Ok(())
636}
637
638fn count_query(storage: &Storage, sql: &str) -> Result<i64> {
639    Ok(storage
640        .query_chunks(sql)?
641        .first()
642        .and_then(|r| r.as_object())
643        .and_then(|m| m.values().next())
644        .and_then(Value::as_i64)
645        .unwrap_or(0))
646}
647
648fn count_query_params<P: rusqlite::Params>(storage: &Storage, sql: &str, p: P) -> Result<i64> {
649    Ok(storage
650        .query_chunks_params(sql, p)?
651        .first()
652        .and_then(|r| r.as_object())
653        .and_then(|m| m.values().next())
654        .and_then(Value::as_i64)
655        .unwrap_or(0))
656}
657
658fn days_ago(now_iso: &str, days: i64) -> String {
659    use chrono::{DateTime, Duration, Utc};
660    if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
661        let cutoff = t - Duration::days(days);
662        return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
663    }
664    now_iso.to_string()
665}
666
667fn minutes_ago(now_iso: &str, minutes: i64) -> String {
668    use chrono::{DateTime, Duration, Utc};
669    if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
670        let cutoff = t - Duration::minutes(minutes);
671        return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
672    }
673    now_iso.to_string()
674}
675
676fn hours_ago(now_iso: &str, hours: i64) -> String {
677    use chrono::{DateTime, Duration, Utc};
678    if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
679        let cutoff = t - Duration::hours(hours);
680        return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
681    }
682    now_iso.to_string()
683}
684
685fn minutes_after(now_iso: &str, minutes: i64) -> String {
686    use chrono::{DateTime, Duration, Utc};
687    if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
688        let cutoff = t + Duration::minutes(minutes);
689        return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
690    }
691    now_iso.to_string()
692}
693
694fn hours_after(now_iso: &str, hours: i64) -> String {
695    use chrono::{DateTime, Duration, Utc};
696    if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
697        let cutoff = t + Duration::hours(hours);
698        return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
699    }
700    now_iso.to_string()
701}
702
703/// Return the number of whole days between two ISO timestamps (now - past; clamped ≥ 0).
704fn iso_days_diff(now_iso: &str, past_iso: &str) -> i64 {
705    use chrono::{DateTime, Utc};
706    let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
707    if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
708        let diff = a - b;
709        diff.num_days().max(0)
710    } else {
711        0
712    }
713}
714
715/// DFS-based cycle detection on the hard-dep graph. Returns list of cycles (each is a Vec of ids).
716fn detect_cycles(deps: &[Value]) -> Vec<Vec<String>> {
717    use std::collections::HashMap;
718    let mut adj: HashMap<String, Vec<String>> = HashMap::new();
719    for d in deps {
720        let src = d
721            .get("src")
722            .and_then(Value::as_str)
723            .unwrap_or("")
724            .to_string();
725        let dst = d
726            .get("dst")
727            .and_then(Value::as_str)
728            .unwrap_or("")
729            .to_string();
730        if !src.is_empty() && !dst.is_empty() {
731            adj.entry(src).or_default().push(dst);
732        }
733    }
734    let nodes: Vec<String> = adj.keys().cloned().collect();
735    let mut visited: HashSet<String> = HashSet::new();
736    let mut on_stack: HashSet<String> = HashSet::new();
737    let mut cycles: Vec<Vec<String>> = vec![];
738
739    fn dfs(
740        node: &str,
741        adj: &HashMap<String, Vec<String>>,
742        visited: &mut HashSet<String>,
743        on_stack: &mut HashSet<String>,
744        path: &mut Vec<String>,
745        cycles: &mut Vec<Vec<String>>,
746    ) {
747        if on_stack.contains(node) {
748            // Found cycle — extract loop segment.
749            let start = path.iter().position(|n| n == node).unwrap_or(0);
750            cycles.push(path[start..].to_vec());
751            return;
752        }
753        if visited.contains(node) {
754            return;
755        }
756        visited.insert(node.to_string());
757        on_stack.insert(node.to_string());
758        path.push(node.to_string());
759        if let Some(children) = adj.get(node) {
760            for child in children {
761                dfs(child, adj, visited, on_stack, path, cycles);
762            }
763        }
764        path.pop();
765        on_stack.remove(node);
766    }
767
768    for node in nodes {
769        let mut path = vec![];
770        dfs(
771            &node,
772            &adj,
773            &mut visited,
774            &mut on_stack,
775            &mut path,
776            &mut cycles,
777        );
778    }
779    cycles
780}