1type PackResult = (
5 Vec<Value>,
6 Vec<(Vec<Value>, f64, usize)>,
7 std::collections::HashMap<String, String>,
8);
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::Arc;
13
14use serde_json::{json, Value};
15
16use crate::embedding::{DummyEmbeddingProvider, EmbeddingProvider};
17use crate::errors::{InnateError, Result};
18use crate::refine::{
19 DefaultSanitizer, DistilledChunk, Distiller, HeuristicDistiller, NoopReranker, NullRefiner,
20 Refiner, Reranker, Sanitizer,
21};
22use crate::storage::{ChunkRow, EpisodicLogRow, Storage};
23use crate::utils::{
24 content_hash, estimate_tokens, gen_uuid, pack_embedding, utc_now_iso, SanitizeAction,
25};
26
27mod appraise;
28mod curate;
29mod evolve;
30mod inspection;
31mod lifecycle;
32mod recall;
33mod record;
34mod repair;
35mod situation;
36
37pub use appraise::{
38 AbstainReason, AppraiseParams, Contributor, FlaggedPoint, Tier, Valence, Verdict,
39 APPRAISE_ADVISORY,
40};
41pub use recall::RecallParams;
42pub use record::RecordParams;
43pub use repair::TraceRepairReport;
44pub use situation::Situation;
45
46const W_CONTENT: f64 = 0.55;
55const W_TRIGGER: f64 = 0.25;
56const W_CONFIDENCE: f64 = 0.10;
57const W_CONTEXT: f64 = 0.15;
58const W_ACTIVATION: f64 = 0.08;
59const W_LEXICAL: f64 = 0.25;
62const TOP_K_CANDIDATES: usize = 20;
63const ANTI_TRIGGER_PENALTY: f64 = 0.6;
64const DENSITY_REFILL: bool = true;
65
66const LOW_CONF_THRESHOLD: f64 = 0.25;
67const LOW_CONF_IDLE_DAYS: i64 = 60;
68const REPEAT_SELECT_MIN: i64 = 10;
69const REPEAT_SELECT_CONF_MAX: f64 = 0.5;
70const NEVER_USED_AGE_DAYS: i64 = 30;
71const OPEN_TTL_DAYS: i64 = 14;
72const SCREENING_TIMEOUT_MINUTES: i64 = 30;
73const PROMOTE_USED_SUCCESS_MIN: i64 = 3;
74const PROMOTE_CONFIDENCE_MIN: f64 = 0.60;
75const DECAY_FLOOR: f64 = 0.20;
76const EVOLVE_THRESHOLD: i64 = 5;
77const DISTILL_BATCH_SIZE: usize = 20;
78const PENDING_RECALL_PENALTY: f64 = 0.60;
79
80const APPRAISE_TIER_WEAK: f64 = 0.30;
83const APPRAISE_TIER_STRONG: f64 = 0.65;
84const APPRAISE_MIN_STRENGTH: f64 = 0.40;
85const APPRAISE_TOP: usize = 8;
86const APPRAISE_TRIGGER_HIT_MIN: f64 = 0.50;
87const APPRAISE_CANDIDATE_IN_EMBED: bool = true;
88const APPRAISE_SIGNATURE_FLOOR: f64 = 0.0;
94const APPRAISE_MIN_EVIDENCE: i64 = 0;
95const APPRAISE_CONFLICT_CEILING: f64 = 1.0;
96const INTUITION_PRIOR_M: f64 = 2.0;
100const INTUITION_BASE_RATE: f64 = 0.5;
101const RECALL_PRIOR_M: f64 = 2.0;
104const RECALL_BASE_RATE: f64 = 0.5;
105const CALIBRATION_BINS: i64 = 10;
107const SITUATION_COARSE_KEYS: &str = "stage,error_class,file_type";
108const EMBED_SITUATION_SIGNATURE: bool = false;
114const GOVERNANCE_ARCHIVE_THRESHOLD: i64 = 3;
115const NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD: i64 = 5;
116const GOVERNANCE_EVOLVE_THRESHOLD: i64 = 3;
117const FAILURE_MIN_USES: i64 = 5;
118const FAILURE_MAX_SUCCESS_RATE: f64 = 0.20;
119const FAILURE_CONFIDENCE_MAX: f64 = 0.35;
120const LOG_COMPACT_DAYS: i64 = 30;
121
122#[derive(Debug, Default, Clone)]
127pub struct RecallResult {
128 pub knowledge: Vec<Value>,
129 pub sparks: Vec<Value>,
130 pub trace_id: String,
131 pub empty: bool,
132 pub depth_skipped: Vec<String>,
133 pub skipped_reasons: HashMap<String, String>,
134}
135
136#[derive(Debug, Default)]
137pub struct CurateReport {
138 pub archived: Vec<String>,
139 pub deduped: Vec<String>,
140 pub decayed: Vec<String>,
141 pub cycles: Vec<Vec<String>>,
142 pub orphans: Vec<String>,
143 pub recovered: Vec<String>,
144 pub warnings: Vec<String>,
145 pub stats: HashMap<String, Value>,
146}
147
148#[derive(Debug, Default)]
149struct DistillBatchReport {
150 distilled: usize,
151 failed: usize,
152}
153
154#[derive(Debug, Default, Clone)]
156pub struct CurateScope {
157 pub origin: Option<String>,
159 pub skill_name: Option<String>,
161 pub dry_run: bool,
163}
164
165pub trait Curator: Send + Sync {
168 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport>;
169}
170
171pub struct BuiltinCurator;
173
174impl Curator for BuiltinCurator {
175 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport> {
176 kb.builtin_curate_impl(scope)
177 }
178}
179
180pub struct KnowledgeBase {
185 pub storage: Storage,
186 embedding: Arc<dyn EmbeddingProvider>,
187 refiner: Arc<dyn Refiner>,
188 distiller: Arc<dyn Distiller>,
189 curator: Arc<dyn Curator>,
190 sanitizer: Arc<dyn Sanitizer>,
191 reranker: Arc<dyn Reranker>,
194
195 w_content: f64,
197 w_trigger: f64,
198 w_confidence: f64,
199 w_context: f64,
200 w_activation: f64,
201 w_lexical: f64,
202 top_k_candidates: usize,
203 anti_trigger_penalty: f64,
204 density_refill: bool,
205
206 low_conf_threshold: f64,
207 low_conf_idle_days: i64,
208 repeat_select_min: i64,
209 repeat_select_conf_max: f64,
210 never_used_age_days: i64,
211 open_ttl_days: i64,
212 screening_timeout_minutes: i64,
213 promote_used_success_min: i64,
214 promote_confidence_min: f64,
215 decay_floor: f64,
216 evolve_threshold: i64,
217 distill_batch_size: usize,
218 evolve_schedule_interval_hours: i64,
219 governance_archive_threshold: i64,
220 negative_feedback_archive_threshold: i64,
221 governance_evolve_threshold: i64,
222 governance_proposal_max_age_days: i64,
223 failure_min_uses: i64,
224 failure_max_success_rate: f64,
225 failure_confidence_max: f64,
226 log_compact_days: i64,
227
228 appraise_tier_weak: f64,
230 appraise_tier_strong: f64,
231 appraise_min_strength: f64,
232 appraise_top: usize,
233 appraise_trigger_hit_min: f64,
234 appraise_candidate_in_embed: bool,
235 appraise_signature_floor: f64,
236 appraise_min_evidence: i64,
237 appraise_conflict_ceiling: f64,
238 intuition_prior_m: f64,
239 intuition_base_rate: f64,
240 calibration_bins: i64,
241 situation_coarse_keys: String,
242 embed_situation_signature: bool,
243}
244
245impl KnowledgeBase {
246 pub fn open(db_path: impl AsRef<Path>) -> Result<Self> {
247 Self::open_with(db_path, None, None, None, None, None)
248 }
249
250 pub(crate) fn store_vec_content(&self, chunk_id: &str, cvec: &[f32]) -> Result<()> {
256 let want = self.embedding.content_dim();
257 if cvec.len() != want {
258 return Err(InnateError::InvalidState(format!(
259 "content embedding dim {} != configured {want} (chunk {chunk_id})",
260 cvec.len()
261 )));
262 }
263 self.storage
264 .insert_vec_content(chunk_id, &pack_embedding(cvec))
265 }
266
267 pub(crate) fn store_vec_trigger(&self, chunk_id: &str, tvec: &[f32]) -> Result<()> {
270 let want = self.embedding.trigger_dim();
271 if tvec.len() != want {
272 return Err(InnateError::InvalidState(format!(
273 "trigger embedding dim {} != configured {want} (chunk {chunk_id})",
274 tvec.len()
275 )));
276 }
277 self.storage
278 .insert_vec_trigger(chunk_id, &pack_embedding(tvec))
279 }
280
281 pub fn open_with(
282 db_path: impl AsRef<Path>,
283 embedding: Option<Arc<dyn EmbeddingProvider>>,
284 refiner: Option<Arc<dyn Refiner>>,
285 distiller: Option<Arc<dyn Distiller>>,
286 curator: Option<Arc<dyn Curator>>,
287 sanitizer: Option<Arc<dyn Sanitizer>>,
288 ) -> Result<Self> {
289 let embedding = embedding.unwrap_or_else(|| Arc::new(DummyEmbeddingProvider::default()));
290 let refiner = refiner.unwrap_or_else(|| Arc::new(NullRefiner));
291 let distiller = distiller.unwrap_or_else(|| Arc::new(HeuristicDistiller));
292 let curator = curator.unwrap_or_else(|| Arc::new(BuiltinCurator));
293 let sanitizer = sanitizer.unwrap_or_else(|| Arc::new(DefaultSanitizer));
294 let reranker: Arc<dyn Reranker> = Arc::new(NoopReranker);
295
296 let storage = Storage::open(db_path, embedding.content_dim(), embedding.trigger_dim())?;
297
298 let mut kb = Self {
299 storage,
300 embedding,
301 refiner,
302 distiller,
303 curator,
304 sanitizer,
305 reranker,
306 w_lexical: W_LEXICAL,
307 embed_situation_signature: EMBED_SITUATION_SIGNATURE,
308 w_content: W_CONTENT,
309 w_trigger: W_TRIGGER,
310 w_confidence: W_CONFIDENCE,
311 w_context: W_CONTEXT,
312 w_activation: W_ACTIVATION,
313 top_k_candidates: TOP_K_CANDIDATES,
314 anti_trigger_penalty: ANTI_TRIGGER_PENALTY,
315 density_refill: DENSITY_REFILL,
316 low_conf_threshold: LOW_CONF_THRESHOLD,
317 low_conf_idle_days: LOW_CONF_IDLE_DAYS,
318 repeat_select_min: REPEAT_SELECT_MIN,
319 repeat_select_conf_max: REPEAT_SELECT_CONF_MAX,
320 never_used_age_days: NEVER_USED_AGE_DAYS,
321 open_ttl_days: OPEN_TTL_DAYS,
322 screening_timeout_minutes: SCREENING_TIMEOUT_MINUTES,
323 promote_used_success_min: PROMOTE_USED_SUCCESS_MIN,
324 promote_confidence_min: PROMOTE_CONFIDENCE_MIN,
325 decay_floor: DECAY_FLOOR,
326 evolve_threshold: EVOLVE_THRESHOLD,
327 distill_batch_size: DISTILL_BATCH_SIZE,
328 evolve_schedule_interval_hours: 6,
329 governance_archive_threshold: GOVERNANCE_ARCHIVE_THRESHOLD,
330 negative_feedback_archive_threshold: NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
331 governance_evolve_threshold: GOVERNANCE_EVOLVE_THRESHOLD,
332 governance_proposal_max_age_days: 30,
333 failure_min_uses: FAILURE_MIN_USES,
334 failure_max_success_rate: FAILURE_MAX_SUCCESS_RATE,
335 failure_confidence_max: FAILURE_CONFIDENCE_MAX,
336 log_compact_days: LOG_COMPACT_DAYS,
337 appraise_tier_weak: APPRAISE_TIER_WEAK,
338 appraise_tier_strong: APPRAISE_TIER_STRONG,
339 appraise_min_strength: APPRAISE_MIN_STRENGTH,
340 appraise_top: APPRAISE_TOP,
341 appraise_trigger_hit_min: APPRAISE_TRIGGER_HIT_MIN,
342 appraise_candidate_in_embed: APPRAISE_CANDIDATE_IN_EMBED,
343 appraise_signature_floor: APPRAISE_SIGNATURE_FLOOR,
344 appraise_min_evidence: APPRAISE_MIN_EVIDENCE,
345 appraise_conflict_ceiling: APPRAISE_CONFLICT_CEILING,
346 intuition_prior_m: INTUITION_PRIOR_M,
347 intuition_base_rate: INTUITION_BASE_RATE,
348 calibration_bins: CALIBRATION_BINS,
349 situation_coarse_keys: SITUATION_COARSE_KEYS.to_string(),
350 };
351 kb.init_meta()?;
352 kb.load_params()?;
353 Ok(kb)
354 }
355
356 pub fn with_reranker(mut self, reranker: Arc<dyn Reranker>) -> Self {
360 self.reranker = reranker;
361 self
362 }
363
364 fn init_meta(&self) -> Result<()> {
365 let lib_id = gen_uuid();
366 let content_dim = self.embedding.content_dim().to_string();
367 let trigger_dim = self.embedding.trigger_dim().to_string();
368 let embed_model = self.embedding.model_name();
369
370 for (key, expected) in [
371 ("content_dim", self.embedding.content_dim()),
372 ("trigger_dim", self.embedding.trigger_dim()),
373 ] {
374 if let Some(stored) = self.storage.get_meta(key)? {
375 let actual = stored.parse::<usize>().map_err(|_| {
376 InnateError::Other(format!("invalid {key} metadata value: {stored}"))
377 })?;
378 if actual != expected {
379 return Err(InnateError::Other(format!(
380 "{key} mismatch: database uses {actual}, embedding provider uses {expected}"
381 )));
382 }
383 }
384 }
385
386 let defaults: &[(&str, &str)] = &[
387 ("lib_id", &lib_id),
388 ("lib_role", "personal"),
389 ("schema_version", "4.14"),
390 ("content_dim", &content_dim),
391 ("trigger_dim", &trigger_dim),
392 ("embed_model", embed_model),
393 ("embed_version", "1"),
394 ("vector_revision", "0"),
395 ("last_agg_ts", "1970-01-01T00:00:00.000Z"),
396 ("recall.w_content", "0.55"),
397 ("recall.w_trigger", "0.25"),
398 ("recall.w_confidence", "0.10"),
399 ("recall.w_context", "0.15"),
400 ("recall.w_activation", "0.08"),
401 ("recall.w_lexical", "0.25"),
402 ("recall.embed_situation_signature", "false"),
403 ("recall.top_k_candidates", "20"),
404 ("recall.anti_trigger_penalty", "0.6"),
405 ("recall.density_refill", "true"),
406 ("curate.low_conf_threshold", "0.25"),
407 ("curate.low_conf_idle_days", "60"),
408 ("curate.repeat_select_min", "10"),
409 ("curate.repeat_select_conf_max", "0.5"),
410 ("curate.never_used_age_days", "30"),
411 ("curate.open_ttl_days", "14"),
412 ("curate.screening_timeout_minutes", "30"),
413 ("curate.promote_used_success_min", "3"),
414 ("curate.promote_confidence_min", "0.60"),
415 ("curate.decay_floor", "0.20"),
416 ("evolve.threshold_new_count", "5"),
417 ("evolve.distill_batch_size", "20"),
418 ("evolve.schedule_interval_hours", "6"),
419 ("curate.soft_mature_threshold", "5"),
420 ("evolve.distill_token_window_hours", "24"),
421 ("curate.governance_archive_threshold", "3"),
422 ("curate.negative_feedback_archive_threshold", "5"),
423 ("evolve.governance_pending_threshold", "3"),
424 ("curate.governance_proposal_max_age_days", "30"),
425 ("curate.failure_min_uses", "5"),
426 ("curate.failure_max_success_rate", "0.20"),
427 ("curate.failure_confidence_max", "0.35"),
428 ("curate.log_compact_days", "30"),
429 ("appraise.tier_weak", "0.30"),
430 ("appraise.tier_strong", "0.65"),
431 ("appraise.min_strength", "0.40"),
432 ("appraise.top", "8"),
433 ("appraise.trigger_hit_min", "0.50"),
434 ("appraise.candidate_in_embed", "true"),
435 ("appraise.signature_floor", "0.0"),
436 ("appraise.min_evidence", "0"),
437 ("appraise.conflict_ceiling", "1.0"),
438 ("intuition.prior_m", "2.0"),
439 ("intuition.base_rate", "0.5"),
440 ("intuition.calibration_bins", "10"),
441 ("situation.coarse_keys", "stage,error_class,file_type"),
442 ];
443 self.storage.begin_immediate()?;
444 let result = (|| -> Result<()> {
445 for (k, v) in defaults {
446 if self.storage.get_meta(k)?.is_none() {
447 self.storage.set_meta(k, v)?;
448 }
449 }
450 self.storage.commit()
451 })();
452 if result.is_err() {
453 let _ = self.storage.rollback();
454 }
455 result
456 }
457
458 fn load_params(&mut self) -> Result<()> {
459 let f = |k: &str, d: f64| -> f64 {
460 self.storage
461 .get_meta(k)
462 .ok()
463 .flatten()
464 .and_then(|v| v.parse().ok())
465 .unwrap_or(d)
466 };
467 let i = |k: &str, d: i64| -> i64 {
468 self.storage
469 .get_meta(k)
470 .ok()
471 .flatten()
472 .and_then(|v| v.parse().ok())
473 .unwrap_or(d)
474 };
475 let b = |k: &str, d: bool| -> bool {
476 self.storage
477 .get_meta(k)
478 .ok()
479 .flatten()
480 .map(|v| v.to_lowercase() == "true")
481 .unwrap_or(d)
482 };
483 self.w_content = f("recall.w_content", W_CONTENT);
484 self.w_trigger = f("recall.w_trigger", W_TRIGGER);
485 self.w_confidence = f("recall.w_confidence", W_CONFIDENCE);
486 self.w_context = f("recall.w_context", W_CONTEXT);
487 self.w_lexical = f("recall.w_lexical", W_LEXICAL);
488 self.embed_situation_signature =
489 b("recall.embed_situation_signature", EMBED_SITUATION_SIGNATURE);
490 self.w_activation = f("recall.w_activation", W_ACTIVATION);
491 self.top_k_candidates =
492 i("recall.top_k_candidates", TOP_K_CANDIDATES as i64).max(1) as usize;
493 self.anti_trigger_penalty = f("recall.anti_trigger_penalty", ANTI_TRIGGER_PENALTY);
494 self.density_refill = b("recall.density_refill", DENSITY_REFILL);
495 self.low_conf_threshold = f("curate.low_conf_threshold", LOW_CONF_THRESHOLD);
496 self.low_conf_idle_days = i("curate.low_conf_idle_days", LOW_CONF_IDLE_DAYS);
497 self.repeat_select_min = i("curate.repeat_select_min", REPEAT_SELECT_MIN);
498 self.repeat_select_conf_max = f("curate.repeat_select_conf_max", REPEAT_SELECT_CONF_MAX);
499 self.never_used_age_days = i("curate.never_used_age_days", NEVER_USED_AGE_DAYS);
500 self.open_ttl_days = i("curate.open_ttl_days", OPEN_TTL_DAYS);
501 self.screening_timeout_minutes = i(
502 "curate.screening_timeout_minutes",
503 SCREENING_TIMEOUT_MINUTES,
504 );
505 self.promote_used_success_min =
506 i("curate.promote_used_success_min", PROMOTE_USED_SUCCESS_MIN);
507 self.promote_confidence_min = f("curate.promote_confidence_min", PROMOTE_CONFIDENCE_MIN);
508 self.decay_floor = f("curate.decay_floor", DECAY_FLOOR).clamp(0.0, 0.4);
509 self.evolve_threshold = i("evolve.threshold_new_count", EVOLVE_THRESHOLD);
510 self.distill_batch_size =
511 i("evolve.distill_batch_size", DISTILL_BATCH_SIZE as i64) as usize;
512 self.evolve_schedule_interval_hours = i("evolve.schedule_interval_hours", 6).max(1);
513 self.governance_archive_threshold = i(
514 "curate.governance_archive_threshold",
515 GOVERNANCE_ARCHIVE_THRESHOLD,
516 )
517 .max(1);
518 self.negative_feedback_archive_threshold = i(
519 "curate.negative_feedback_archive_threshold",
520 NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
521 )
522 .max(1);
523 self.governance_evolve_threshold = i(
524 "evolve.governance_pending_threshold",
525 GOVERNANCE_EVOLVE_THRESHOLD,
526 )
527 .max(1);
528 self.governance_proposal_max_age_days =
529 i("curate.governance_proposal_max_age_days", 30).max(1);
530 self.failure_min_uses = i("curate.failure_min_uses", FAILURE_MIN_USES).max(1);
531 self.failure_max_success_rate =
532 f("curate.failure_max_success_rate", FAILURE_MAX_SUCCESS_RATE).clamp(0.0, 1.0);
533 self.failure_confidence_max =
534 f("curate.failure_confidence_max", FAILURE_CONFIDENCE_MAX).clamp(0.0, 1.0);
535 self.log_compact_days = i("curate.log_compact_days", LOG_COMPACT_DAYS).max(1);
536 let s = |k: &str, d: &str| -> String {
537 self.storage
538 .get_meta(k)
539 .ok()
540 .flatten()
541 .filter(|v| !v.trim().is_empty())
542 .unwrap_or_else(|| d.to_string())
543 };
544 self.appraise_tier_weak = f("appraise.tier_weak", APPRAISE_TIER_WEAK).clamp(0.0, 1.0);
545 self.appraise_tier_strong = f("appraise.tier_strong", APPRAISE_TIER_STRONG).clamp(0.0, 1.0);
546 self.appraise_min_strength =
547 f("appraise.min_strength", APPRAISE_MIN_STRENGTH).clamp(0.0, 1.0);
548 self.appraise_top = i("appraise.top", APPRAISE_TOP as i64).max(1) as usize;
549 self.appraise_trigger_hit_min =
550 f("appraise.trigger_hit_min", APPRAISE_TRIGGER_HIT_MIN).clamp(0.0, 1.0);
551 self.appraise_candidate_in_embed =
552 b("appraise.candidate_in_embed", APPRAISE_CANDIDATE_IN_EMBED);
553 self.appraise_signature_floor =
554 f("appraise.signature_floor", APPRAISE_SIGNATURE_FLOOR).clamp(0.0, 1.0);
555 self.appraise_min_evidence = i("appraise.min_evidence", APPRAISE_MIN_EVIDENCE).max(0);
556 self.appraise_conflict_ceiling =
557 f("appraise.conflict_ceiling", APPRAISE_CONFLICT_CEILING).clamp(0.0, 1.0);
558 self.intuition_prior_m = f("intuition.prior_m", INTUITION_PRIOR_M).max(0.0);
559 self.intuition_base_rate = f("intuition.base_rate", INTUITION_BASE_RATE).clamp(0.0, 1.0);
560 self.calibration_bins = i("intuition.calibration_bins", CALIBRATION_BINS).clamp(2, 100);
561 self.situation_coarse_keys = s("situation.coarse_keys", SITUATION_COARSE_KEYS);
562 Ok(())
563 }
564}
565
566struct CandidateInfo {
571 chunk: Value,
572 sim_content: f32,
573 sim_trigger: f32,
574 sim_lexical: f32,
577}
578
579fn signature_has_signal(sig: &str) -> bool {
583 sig.split('|').any(|p| {
584 p.split_once('=')
585 .map(|(_, v)| !v.is_empty() && v != "none" && v != "unknown")
586 .unwrap_or(false)
587 })
588}
589
590fn new_candidate(chunk: &Value) -> CandidateInfo {
593 CandidateInfo {
594 chunk: chunk.clone(),
595 sim_content: 0.0,
596 sim_trigger: 0.0,
597 sim_lexical: 0.0,
598 }
599}
600
601fn chunk_is_valid_for_recall(chunk: &Value, embed_version: i64) -> bool {
602 chunk.get("state").and_then(Value::as_str) != Some("archived")
603 && chunk.get("origin").and_then(Value::as_str) != Some("spark")
604 && chunk
605 .get("embed_version")
606 .and_then(Value::as_i64)
607 .unwrap_or(1)
608 >= embed_version
609}
610
611fn normalize_query(query: &str) -> String {
620 const STOP_WORDS: &[&str] = &[
621 "a", "an", "and", "for", "in", "of", "on", "the", "to", "with",
622 ];
623 let cleaned: String = query
624 .to_lowercase()
625 .chars()
626 .map(|ch| {
627 if ch.is_alphanumeric() || ch.is_whitespace() {
628 ch
629 } else {
630 ' '
631 }
632 })
633 .collect();
634 let mut tokens: Vec<&str> = cleaned
635 .split_whitespace()
636 .filter(|token| !STOP_WORDS.contains(token))
637 .collect();
638 tokens.sort_unstable();
639 tokens.dedup();
640 tokens.join(" ")
641}
642
643fn estimate_distill_prompt_tokens(log: &Value, related_logs: &[Value]) -> i64 {
644 let primary: i64 = [
645 "query",
646 "recall_snapshot",
647 "output",
648 "output_summary",
649 "nomination",
650 ]
651 .iter()
652 .filter_map(|key| log.get(*key).and_then(Value::as_str))
653 .map(|text| estimate_tokens(text) as i64)
654 .sum();
655 let log_id = log.get("id").and_then(Value::as_str).unwrap_or("");
656 let context_key = log.get("context_key").and_then(Value::as_str);
657 let related: i64 = related_logs
658 .iter()
659 .filter(|other| other.get("id").and_then(Value::as_str).unwrap_or("") != log_id)
660 .filter(|other| {
661 context_key.is_some() && other.get("context_key").and_then(Value::as_str) == context_key
662 })
663 .take(4)
664 .flat_map(|other| {
665 ["query", "output_summary", "outcome"]
666 .into_iter()
667 .filter_map(|key| other.get(key).and_then(Value::as_str))
668 })
669 .map(|text| estimate_tokens(text) as i64)
670 .sum();
671 primary + related
672}
673
674fn estimate_distilled_chunk_tokens(chunk: &DistilledChunk) -> i64 {
675 estimate_tokens(&chunk.content) as i64
676 + chunk
677 .trigger_desc
678 .as_deref()
679 .map(estimate_tokens)
680 .unwrap_or(0) as i64
681 + chunk
682 .anti_trigger_desc
683 .as_deref()
684 .map(estimate_tokens)
685 .unwrap_or(0) as i64
686}
687
688fn anti_trigger_hit(query: &str, anti: &str) -> bool {
689 let q_lower = query.to_lowercase();
690 anti.to_lowercase().split(',').any(|part| {
691 let p = part.trim();
692 !p.is_empty() && q_lower.contains(p)
693 })
694}
695
696fn block_cost(block: &[Value]) -> usize {
697 block
698 .iter()
699 .map(|b| {
700 b.get("token_count")
701 .and_then(Value::as_u64)
702 .map(|t| t as usize)
703 .unwrap_or_else(|| {
704 estimate_tokens(b.get("content").and_then(Value::as_str).unwrap_or("")).max(100)
705 })
706 })
707 .sum()
708}
709
710fn limit_knowledge(knowledge: Vec<Value>, top: Option<usize>) -> Vec<Value> {
711 match top {
712 None => knowledge,
713 Some(0) => vec![],
714 Some(n) => knowledge.into_iter().take(n).collect(),
715 }
716}
717
718fn usage_state(used: Option<&[String]>) -> &'static str {
719 match used {
720 None => "unknown",
721 Some([]) => "known_none",
722 Some(_) => "known_some",
723 }
724}
725
726fn ratio(numerator: i64, denominator: i64) -> f64 {
727 if denominator <= 0 {
728 0.0
729 } else {
730 ((numerator as f64 / denominator as f64) * 1000.0).round() / 1000.0
731 }
732}
733
734fn validate_source(source: &str) -> Result<()> {
735 if !matches!(
736 source,
737 "mcp" | "sdk" | "cli" | "hook" | "daemon" | "augmented"
738 ) {
739 return Err(InnateError::InvalidState(format!(
740 "invalid event source: {source}"
741 )));
742 }
743 Ok(())
744}
745
746fn count_query(storage: &Storage, sql: &str) -> Result<i64> {
747 Ok(storage
748 .query_chunks(sql)?
749 .first()
750 .and_then(|r| r.as_object())
751 .and_then(|m| m.values().next())
752 .and_then(Value::as_i64)
753 .unwrap_or(0))
754}
755
756fn count_query_params<P: rusqlite::Params>(storage: &Storage, sql: &str, p: P) -> Result<i64> {
757 Ok(storage
758 .query_chunks_params(sql, p)?
759 .first()
760 .and_then(|r| r.as_object())
761 .and_then(|m| m.values().next())
762 .and_then(Value::as_i64)
763 .unwrap_or(0))
764}
765
766fn days_ago(now_iso: &str, days: i64) -> String {
767 use chrono::{DateTime, Duration, Utc};
768 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
769 let cutoff = t - Duration::days(days);
770 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
771 }
772 now_iso.to_string()
773}
774
775fn minutes_ago(now_iso: &str, minutes: i64) -> String {
776 use chrono::{DateTime, Duration, Utc};
777 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
778 let cutoff = t - Duration::minutes(minutes);
779 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
780 }
781 now_iso.to_string()
782}
783
784fn hours_ago(now_iso: &str, hours: i64) -> String {
785 use chrono::{DateTime, Duration, Utc};
786 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
787 let cutoff = t - Duration::hours(hours);
788 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
789 }
790 now_iso.to_string()
791}
792
793fn minutes_after(now_iso: &str, minutes: i64) -> String {
794 use chrono::{DateTime, Duration, Utc};
795 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
796 let cutoff = t + Duration::minutes(minutes);
797 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
798 }
799 now_iso.to_string()
800}
801
802fn hours_after(now_iso: &str, hours: i64) -> String {
803 use chrono::{DateTime, Duration, Utc};
804 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
805 let cutoff = t + Duration::hours(hours);
806 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
807 }
808 now_iso.to_string()
809}
810
811fn iso_days_diff(now_iso: &str, past_iso: &str) -> i64 {
813 use chrono::{DateTime, Utc};
814 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
815 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
816 let diff = a - b;
817 diff.num_days().max(0)
818 } else {
819 0
820 }
821}
822
823fn iso_fractional_days(now_iso: &str, past_iso: &str) -> f64 {
826 use chrono::{DateTime, Utc};
827 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
828 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
829 ((a - b).num_seconds().max(0)) as f64 / 86_400.0
830 } else {
831 0.0
832 }
833}
834
835const ACTR_DECAY: f64 = 0.5;
837
838pub(super) fn actr_activation(used_count: i64, last_used_at: Option<&str>, now_iso: &str) -> f64 {
851 if used_count <= 0 {
852 return 0.0;
853 }
854 let Some(last) = last_used_at else {
855 return 0.0;
856 };
857 let recency_days = iso_fractional_days(now_iso, last);
858 let b = (1.0 + used_count as f64).ln() - ACTR_DECAY * (1.0 + recency_days).ln();
859 1.0 / (1.0 + (-b).exp())
860}
861
862fn detect_cycles(deps: &[Value]) -> Vec<Vec<String>> {
864 use std::collections::HashMap;
865 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
866 for d in deps {
867 let src = d
868 .get("src")
869 .and_then(Value::as_str)
870 .unwrap_or("")
871 .to_string();
872 let dst = d
873 .get("dst")
874 .and_then(Value::as_str)
875 .unwrap_or("")
876 .to_string();
877 if !src.is_empty() && !dst.is_empty() {
878 adj.entry(src).or_default().push(dst);
879 }
880 }
881 let nodes: Vec<String> = adj.keys().cloned().collect();
882 let mut visited: HashSet<String> = HashSet::new();
883 let mut on_stack: HashSet<String> = HashSet::new();
884 let mut cycles: Vec<Vec<String>> = vec![];
885
886 fn dfs(
887 node: &str,
888 adj: &HashMap<String, Vec<String>>,
889 visited: &mut HashSet<String>,
890 on_stack: &mut HashSet<String>,
891 path: &mut Vec<String>,
892 cycles: &mut Vec<Vec<String>>,
893 ) {
894 if on_stack.contains(node) {
895 let start = path.iter().position(|n| n == node).unwrap_or(0);
897 cycles.push(path[start..].to_vec());
898 return;
899 }
900 if visited.contains(node) {
901 return;
902 }
903 visited.insert(node.to_string());
904 on_stack.insert(node.to_string());
905 path.push(node.to_string());
906 if let Some(children) = adj.get(node) {
907 for child in children {
908 dfs(child, adj, visited, on_stack, path, cycles);
909 }
910 }
911 path.pop();
912 on_stack.remove(node);
913 }
914
915 for node in nodes {
916 let mut path = vec![];
917 dfs(
918 &node,
919 &adj,
920 &mut visited,
921 &mut on_stack,
922 &mut path,
923 &mut cycles,
924 );
925 }
926 cycles
927}