1type PackResult = (
5 Vec<Value>,
6 Vec<(Vec<Value>, f64, usize)>,
7 std::collections::HashMap<String, String>,
8);
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::Arc;
13
14use serde_json::{json, Value};
15
16use crate::embedding::{DummyEmbeddingProvider, EmbeddingProvider};
17use crate::errors::{InnateError, Result};
18use crate::refine::{
19 DefaultSanitizer, DistilledChunk, Distiller, HeuristicDistiller, NoopReranker, NullRefiner,
20 Refiner, Reranker, Sanitizer,
21};
22use crate::storage::{ChunkRow, EpisodicLogRow, Storage};
23use crate::utils::{
24 agent_source, content_hash, estimate_tokens, gen_uuid, pack_embedding, utc_now_iso,
25 SanitizeAction,
26};
27
28mod appraise;
29mod curate;
30mod evolve;
31mod inspection;
32mod lifecycle;
33mod recall;
34mod record;
35mod repair;
36mod situation;
37
38pub use appraise::{
39 AbstainReason, AppraiseParams, Contributor, FlaggedPoint, Tier, Valence, Verdict,
40 APPRAISE_ADVISORY,
41};
42pub use recall::RecallParams;
43pub use record::RecordParams;
44pub use repair::TraceRepairReport;
45pub use situation::Situation;
46
47const W_CONTENT: f64 = 0.55;
56const W_TRIGGER: f64 = 0.25;
57const W_CONFIDENCE: f64 = 0.10;
58const W_CONTEXT: f64 = 0.15;
59const W_ACTIVATION: f64 = 0.08;
60const W_LEXICAL: f64 = 0.25;
63const TOP_K_CANDIDATES: usize = 20;
64const ANTI_TRIGGER_PENALTY: f64 = 0.6;
65const DENSITY_REFILL: bool = true;
66
67const LOW_CONF_THRESHOLD: f64 = 0.25;
68const LOW_CONF_IDLE_DAYS: i64 = 60;
69const REPEAT_SELECT_MIN: i64 = 10;
70const REPEAT_SELECT_CONF_MAX: f64 = 0.5;
71const NEVER_USED_AGE_DAYS: i64 = 30;
72const OPEN_TTL_DAYS: i64 = 14;
73const SCREENING_TIMEOUT_MINUTES: i64 = 30;
74const PROMOTE_USED_SUCCESS_MIN: i64 = 3;
75const PROMOTE_CONFIDENCE_MIN: f64 = 0.60;
76const DECAY_FLOOR: f64 = 0.20;
77const EVOLVE_THRESHOLD: i64 = 5;
78const DISTILL_BATCH_SIZE: usize = 20;
79const PENDING_RECALL_PENALTY: f64 = 0.60;
80
81const APPRAISE_TIER_WEAK: f64 = 0.30;
84const APPRAISE_TIER_STRONG: f64 = 0.65;
85const APPRAISE_MIN_STRENGTH: f64 = 0.40;
86const APPRAISE_TOP: usize = 8;
87const APPRAISE_TRIGGER_HIT_MIN: f64 = 0.50;
88const APPRAISE_CANDIDATE_IN_EMBED: bool = true;
89const APPRAISE_SIGNATURE_FLOOR: f64 = 0.0;
95const APPRAISE_MIN_EVIDENCE: i64 = 0;
96const APPRAISE_CONFLICT_CEILING: f64 = 1.0;
97const INTUITION_PRIOR_M: f64 = 2.0;
101const INTUITION_BASE_RATE: f64 = 0.5;
102const RECALL_PRIOR_M: f64 = 2.0;
105const RECALL_BASE_RATE: f64 = 0.5;
106const CALIBRATION_BINS: i64 = 10;
108const SITUATION_COARSE_KEYS: &str = "stage,error_class,file_type";
109const EMBED_SITUATION_SIGNATURE: bool = false;
115const GOVERNANCE_ARCHIVE_THRESHOLD: i64 = 3;
116const NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD: i64 = 5;
117const GOVERNANCE_EVOLVE_THRESHOLD: i64 = 3;
118const FAILURE_MIN_USES: i64 = 5;
119const FAILURE_MAX_SUCCESS_RATE: f64 = 0.20;
120const FAILURE_CONFIDENCE_MAX: f64 = 0.35;
121const LOG_COMPACT_DAYS: i64 = 30;
122
123#[derive(Debug, Default, Clone)]
128pub struct RecallResult {
129 pub knowledge: Vec<Value>,
130 pub sparks: Vec<Value>,
131 pub trace_id: String,
132 pub empty: bool,
133 pub depth_skipped: Vec<String>,
134 pub skipped_reasons: HashMap<String, String>,
135}
136
137#[derive(Debug, Default)]
138pub struct CurateReport {
139 pub archived: Vec<String>,
140 pub deduped: Vec<String>,
141 pub decayed: Vec<String>,
142 pub cycles: Vec<Vec<String>>,
143 pub orphans: Vec<String>,
144 pub recovered: Vec<String>,
145 pub warnings: Vec<String>,
146 pub stats: HashMap<String, Value>,
147}
148
149#[derive(Debug, Default)]
150struct DistillBatchReport {
151 distilled: usize,
152 failed: usize,
153}
154
155#[derive(Debug, Default, Clone)]
157pub struct CurateScope {
158 pub origin: Option<String>,
160 pub skill_name: Option<String>,
162 pub dry_run: bool,
164}
165
166pub trait Curator: Send + Sync {
169 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport>;
170}
171
172pub struct BuiltinCurator;
174
175impl Curator for BuiltinCurator {
176 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport> {
177 kb.builtin_curate_impl(scope)
178 }
179}
180
181pub struct KnowledgeBase {
186 pub storage: Storage,
187 embedding: Arc<dyn EmbeddingProvider>,
188 refiner: Arc<dyn Refiner>,
189 distiller: Arc<dyn Distiller>,
190 curator: Arc<dyn Curator>,
191 sanitizer: Arc<dyn Sanitizer>,
192 reranker: Arc<dyn Reranker>,
195
196 w_content: f64,
198 w_trigger: f64,
199 w_confidence: f64,
200 w_context: f64,
201 w_activation: f64,
202 w_lexical: f64,
203 top_k_candidates: usize,
204 anti_trigger_penalty: f64,
205 density_refill: bool,
206
207 low_conf_threshold: f64,
208 low_conf_idle_days: i64,
209 repeat_select_min: i64,
210 repeat_select_conf_max: f64,
211 never_used_age_days: i64,
212 open_ttl_days: i64,
213 screening_timeout_minutes: i64,
214 promote_used_success_min: i64,
215 promote_confidence_min: f64,
216 decay_floor: f64,
217 evolve_threshold: i64,
218 distill_batch_size: usize,
219 evolve_schedule_interval_hours: i64,
220 governance_archive_threshold: i64,
221 negative_feedback_archive_threshold: i64,
222 governance_evolve_threshold: i64,
223 governance_proposal_max_age_days: i64,
224 failure_min_uses: i64,
225 failure_max_success_rate: f64,
226 failure_confidence_max: f64,
227 log_compact_days: i64,
228
229 appraise_tier_weak: f64,
231 appraise_tier_strong: f64,
232 appraise_min_strength: f64,
233 appraise_top: usize,
234 appraise_trigger_hit_min: f64,
235 appraise_candidate_in_embed: bool,
236 appraise_signature_floor: f64,
237 appraise_min_evidence: i64,
238 appraise_conflict_ceiling: f64,
239 intuition_prior_m: f64,
240 intuition_base_rate: f64,
241 calibration_bins: i64,
242 situation_coarse_keys: String,
243 embed_situation_signature: bool,
244}
245
246impl KnowledgeBase {
247 pub fn open(db_path: impl AsRef<Path>) -> Result<Self> {
248 Self::open_with(db_path, None, None, None, None, None)
249 }
250
251 pub(crate) fn store_vec_content(&self, chunk_id: &str, cvec: &[f32]) -> Result<()> {
257 let want = self.embedding.content_dim();
258 if cvec.len() != want {
259 return Err(InnateError::InvalidState(format!(
260 "content embedding dim {} != configured {want} (chunk {chunk_id})",
261 cvec.len()
262 )));
263 }
264 self.storage
265 .insert_vec_content(chunk_id, &pack_embedding(cvec))
266 }
267
268 pub(crate) fn store_vec_trigger(&self, chunk_id: &str, tvec: &[f32]) -> Result<()> {
271 let want = self.embedding.trigger_dim();
272 if tvec.len() != want {
273 return Err(InnateError::InvalidState(format!(
274 "trigger embedding dim {} != configured {want} (chunk {chunk_id})",
275 tvec.len()
276 )));
277 }
278 self.storage
279 .insert_vec_trigger(chunk_id, &pack_embedding(tvec))
280 }
281
282 pub fn open_with(
283 db_path: impl AsRef<Path>,
284 embedding: Option<Arc<dyn EmbeddingProvider>>,
285 refiner: Option<Arc<dyn Refiner>>,
286 distiller: Option<Arc<dyn Distiller>>,
287 curator: Option<Arc<dyn Curator>>,
288 sanitizer: Option<Arc<dyn Sanitizer>>,
289 ) -> Result<Self> {
290 let embedding = embedding.unwrap_or_else(|| Arc::new(DummyEmbeddingProvider::default()));
291 let refiner = refiner.unwrap_or_else(|| Arc::new(NullRefiner));
292 let distiller = distiller.unwrap_or_else(|| Arc::new(HeuristicDistiller));
293 let curator = curator.unwrap_or_else(|| Arc::new(BuiltinCurator));
294 let sanitizer = sanitizer.unwrap_or_else(|| Arc::new(DefaultSanitizer));
295 let reranker: Arc<dyn Reranker> = Arc::new(NoopReranker);
296
297 let storage = Storage::open(db_path, embedding.content_dim(), embedding.trigger_dim())?;
298
299 let mut kb = Self {
300 storage,
301 embedding,
302 refiner,
303 distiller,
304 curator,
305 sanitizer,
306 reranker,
307 w_lexical: W_LEXICAL,
308 embed_situation_signature: EMBED_SITUATION_SIGNATURE,
309 w_content: W_CONTENT,
310 w_trigger: W_TRIGGER,
311 w_confidence: W_CONFIDENCE,
312 w_context: W_CONTEXT,
313 w_activation: W_ACTIVATION,
314 top_k_candidates: TOP_K_CANDIDATES,
315 anti_trigger_penalty: ANTI_TRIGGER_PENALTY,
316 density_refill: DENSITY_REFILL,
317 low_conf_threshold: LOW_CONF_THRESHOLD,
318 low_conf_idle_days: LOW_CONF_IDLE_DAYS,
319 repeat_select_min: REPEAT_SELECT_MIN,
320 repeat_select_conf_max: REPEAT_SELECT_CONF_MAX,
321 never_used_age_days: NEVER_USED_AGE_DAYS,
322 open_ttl_days: OPEN_TTL_DAYS,
323 screening_timeout_minutes: SCREENING_TIMEOUT_MINUTES,
324 promote_used_success_min: PROMOTE_USED_SUCCESS_MIN,
325 promote_confidence_min: PROMOTE_CONFIDENCE_MIN,
326 decay_floor: DECAY_FLOOR,
327 evolve_threshold: EVOLVE_THRESHOLD,
328 distill_batch_size: DISTILL_BATCH_SIZE,
329 evolve_schedule_interval_hours: 6,
330 governance_archive_threshold: GOVERNANCE_ARCHIVE_THRESHOLD,
331 negative_feedback_archive_threshold: NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
332 governance_evolve_threshold: GOVERNANCE_EVOLVE_THRESHOLD,
333 governance_proposal_max_age_days: 30,
334 failure_min_uses: FAILURE_MIN_USES,
335 failure_max_success_rate: FAILURE_MAX_SUCCESS_RATE,
336 failure_confidence_max: FAILURE_CONFIDENCE_MAX,
337 log_compact_days: LOG_COMPACT_DAYS,
338 appraise_tier_weak: APPRAISE_TIER_WEAK,
339 appraise_tier_strong: APPRAISE_TIER_STRONG,
340 appraise_min_strength: APPRAISE_MIN_STRENGTH,
341 appraise_top: APPRAISE_TOP,
342 appraise_trigger_hit_min: APPRAISE_TRIGGER_HIT_MIN,
343 appraise_candidate_in_embed: APPRAISE_CANDIDATE_IN_EMBED,
344 appraise_signature_floor: APPRAISE_SIGNATURE_FLOOR,
345 appraise_min_evidence: APPRAISE_MIN_EVIDENCE,
346 appraise_conflict_ceiling: APPRAISE_CONFLICT_CEILING,
347 intuition_prior_m: INTUITION_PRIOR_M,
348 intuition_base_rate: INTUITION_BASE_RATE,
349 calibration_bins: CALIBRATION_BINS,
350 situation_coarse_keys: SITUATION_COARSE_KEYS.to_string(),
351 };
352 kb.init_meta()?;
353 kb.load_params()?;
354 Ok(kb)
355 }
356
357 pub fn with_reranker(mut self, reranker: Arc<dyn Reranker>) -> Self {
361 self.reranker = reranker;
362 self
363 }
364
365 fn init_meta(&self) -> Result<()> {
366 let lib_id = gen_uuid();
367 let content_dim = self.embedding.content_dim().to_string();
368 let trigger_dim = self.embedding.trigger_dim().to_string();
369 let embed_model = self.embedding.model_name();
370
371 for (key, expected) in [
372 ("content_dim", self.embedding.content_dim()),
373 ("trigger_dim", self.embedding.trigger_dim()),
374 ] {
375 if let Some(stored) = self.storage.get_meta(key)? {
376 let actual = stored.parse::<usize>().map_err(|_| {
377 InnateError::Other(format!("invalid {key} metadata value: {stored}"))
378 })?;
379 if actual != expected {
380 return Err(InnateError::Other(format!(
381 "{key} mismatch: database uses {actual}, embedding provider uses {expected}"
382 )));
383 }
384 }
385 }
386
387 let defaults: &[(&str, &str)] = &[
388 ("lib_id", &lib_id),
389 ("lib_role", "personal"),
390 ("schema_version", "4.14"),
391 ("content_dim", &content_dim),
392 ("trigger_dim", &trigger_dim),
393 ("embed_model", embed_model),
394 ("embed_version", "1"),
395 ("vector_revision", "0"),
396 ("last_agg_ts", "1970-01-01T00:00:00.000Z"),
397 ("recall.w_content", "0.55"),
398 ("recall.w_trigger", "0.25"),
399 ("recall.w_confidence", "0.10"),
400 ("recall.w_context", "0.15"),
401 ("recall.w_activation", "0.08"),
402 ("recall.w_lexical", "0.25"),
403 ("recall.embed_situation_signature", "false"),
404 ("recall.top_k_candidates", "20"),
405 ("recall.anti_trigger_penalty", "0.6"),
406 ("recall.density_refill", "true"),
407 ("curate.low_conf_threshold", "0.25"),
408 ("curate.low_conf_idle_days", "60"),
409 ("curate.repeat_select_min", "10"),
410 ("curate.repeat_select_conf_max", "0.5"),
411 ("curate.never_used_age_days", "30"),
412 ("curate.open_ttl_days", "14"),
413 ("curate.screening_timeout_minutes", "30"),
414 ("curate.promote_used_success_min", "3"),
415 ("curate.promote_confidence_min", "0.60"),
416 ("curate.decay_floor", "0.20"),
417 ("evolve.threshold_new_count", "5"),
418 ("evolve.distill_batch_size", "20"),
419 ("evolve.schedule_interval_hours", "6"),
420 ("curate.soft_mature_threshold", "5"),
421 ("evolve.distill_token_window_hours", "24"),
422 ("curate.governance_archive_threshold", "3"),
423 ("curate.negative_feedback_archive_threshold", "5"),
424 ("evolve.governance_pending_threshold", "3"),
425 ("curate.governance_proposal_max_age_days", "30"),
426 ("curate.failure_min_uses", "5"),
427 ("curate.failure_max_success_rate", "0.20"),
428 ("curate.failure_confidence_max", "0.35"),
429 ("curate.log_compact_days", "30"),
430 ("appraise.tier_weak", "0.30"),
431 ("appraise.tier_strong", "0.65"),
432 ("appraise.min_strength", "0.40"),
433 ("appraise.top", "8"),
434 ("appraise.trigger_hit_min", "0.50"),
435 ("appraise.candidate_in_embed", "true"),
436 ("appraise.signature_floor", "0.0"),
437 ("appraise.min_evidence", "0"),
438 ("appraise.conflict_ceiling", "1.0"),
439 ("intuition.prior_m", "2.0"),
440 ("intuition.base_rate", "0.5"),
441 ("intuition.calibration_bins", "10"),
442 ("situation.coarse_keys", "stage,error_class,file_type"),
443 ];
444 self.storage.begin_immediate()?;
445 let result = (|| -> Result<()> {
446 for (k, v) in defaults {
447 if self.storage.get_meta(k)?.is_none() {
448 self.storage.set_meta(k, v)?;
449 }
450 }
451 self.storage.commit()
452 })();
453 if result.is_err() {
454 let _ = self.storage.rollback();
455 }
456 result
457 }
458
459 fn load_params(&mut self) -> Result<()> {
460 let f = |k: &str, d: f64| -> f64 {
461 self.storage
462 .get_meta(k)
463 .ok()
464 .flatten()
465 .and_then(|v| v.parse().ok())
466 .unwrap_or(d)
467 };
468 let i = |k: &str, d: i64| -> i64 {
469 self.storage
470 .get_meta(k)
471 .ok()
472 .flatten()
473 .and_then(|v| v.parse().ok())
474 .unwrap_or(d)
475 };
476 let b = |k: &str, d: bool| -> bool {
477 self.storage
478 .get_meta(k)
479 .ok()
480 .flatten()
481 .map(|v| v.to_lowercase() == "true")
482 .unwrap_or(d)
483 };
484 self.w_content = f("recall.w_content", W_CONTENT);
485 self.w_trigger = f("recall.w_trigger", W_TRIGGER);
486 self.w_confidence = f("recall.w_confidence", W_CONFIDENCE);
487 self.w_context = f("recall.w_context", W_CONTEXT);
488 self.w_lexical = f("recall.w_lexical", W_LEXICAL);
489 self.embed_situation_signature =
490 b("recall.embed_situation_signature", EMBED_SITUATION_SIGNATURE);
491 self.w_activation = f("recall.w_activation", W_ACTIVATION);
492 self.top_k_candidates =
493 i("recall.top_k_candidates", TOP_K_CANDIDATES as i64).max(1) as usize;
494 self.anti_trigger_penalty = f("recall.anti_trigger_penalty", ANTI_TRIGGER_PENALTY);
495 self.density_refill = b("recall.density_refill", DENSITY_REFILL);
496 self.low_conf_threshold = f("curate.low_conf_threshold", LOW_CONF_THRESHOLD);
497 self.low_conf_idle_days = i("curate.low_conf_idle_days", LOW_CONF_IDLE_DAYS);
498 self.repeat_select_min = i("curate.repeat_select_min", REPEAT_SELECT_MIN);
499 self.repeat_select_conf_max = f("curate.repeat_select_conf_max", REPEAT_SELECT_CONF_MAX);
500 self.never_used_age_days = i("curate.never_used_age_days", NEVER_USED_AGE_DAYS);
501 self.open_ttl_days = i("curate.open_ttl_days", OPEN_TTL_DAYS);
502 self.screening_timeout_minutes = i(
503 "curate.screening_timeout_minutes",
504 SCREENING_TIMEOUT_MINUTES,
505 );
506 self.promote_used_success_min =
507 i("curate.promote_used_success_min", PROMOTE_USED_SUCCESS_MIN);
508 self.promote_confidence_min = f("curate.promote_confidence_min", PROMOTE_CONFIDENCE_MIN);
509 self.decay_floor = f("curate.decay_floor", DECAY_FLOOR).clamp(0.0, 0.4);
510 self.evolve_threshold = i("evolve.threshold_new_count", EVOLVE_THRESHOLD);
511 self.distill_batch_size =
512 i("evolve.distill_batch_size", DISTILL_BATCH_SIZE as i64) as usize;
513 self.evolve_schedule_interval_hours = i("evolve.schedule_interval_hours", 6).max(1);
514 self.governance_archive_threshold = i(
515 "curate.governance_archive_threshold",
516 GOVERNANCE_ARCHIVE_THRESHOLD,
517 )
518 .max(1);
519 self.negative_feedback_archive_threshold = i(
520 "curate.negative_feedback_archive_threshold",
521 NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
522 )
523 .max(1);
524 self.governance_evolve_threshold = i(
525 "evolve.governance_pending_threshold",
526 GOVERNANCE_EVOLVE_THRESHOLD,
527 )
528 .max(1);
529 self.governance_proposal_max_age_days =
530 i("curate.governance_proposal_max_age_days", 30).max(1);
531 self.failure_min_uses = i("curate.failure_min_uses", FAILURE_MIN_USES).max(1);
532 self.failure_max_success_rate =
533 f("curate.failure_max_success_rate", FAILURE_MAX_SUCCESS_RATE).clamp(0.0, 1.0);
534 self.failure_confidence_max =
535 f("curate.failure_confidence_max", FAILURE_CONFIDENCE_MAX).clamp(0.0, 1.0);
536 self.log_compact_days = i("curate.log_compact_days", LOG_COMPACT_DAYS).max(1);
537 let s = |k: &str, d: &str| -> String {
538 self.storage
539 .get_meta(k)
540 .ok()
541 .flatten()
542 .filter(|v| !v.trim().is_empty())
543 .unwrap_or_else(|| d.to_string())
544 };
545 self.appraise_tier_weak = f("appraise.tier_weak", APPRAISE_TIER_WEAK).clamp(0.0, 1.0);
546 self.appraise_tier_strong = f("appraise.tier_strong", APPRAISE_TIER_STRONG).clamp(0.0, 1.0);
547 self.appraise_min_strength =
548 f("appraise.min_strength", APPRAISE_MIN_STRENGTH).clamp(0.0, 1.0);
549 self.appraise_top = i("appraise.top", APPRAISE_TOP as i64).max(1) as usize;
550 self.appraise_trigger_hit_min =
551 f("appraise.trigger_hit_min", APPRAISE_TRIGGER_HIT_MIN).clamp(0.0, 1.0);
552 self.appraise_candidate_in_embed =
553 b("appraise.candidate_in_embed", APPRAISE_CANDIDATE_IN_EMBED);
554 self.appraise_signature_floor =
555 f("appraise.signature_floor", APPRAISE_SIGNATURE_FLOOR).clamp(0.0, 1.0);
556 self.appraise_min_evidence = i("appraise.min_evidence", APPRAISE_MIN_EVIDENCE).max(0);
557 self.appraise_conflict_ceiling =
558 f("appraise.conflict_ceiling", APPRAISE_CONFLICT_CEILING).clamp(0.0, 1.0);
559 self.intuition_prior_m = f("intuition.prior_m", INTUITION_PRIOR_M).max(0.0);
560 self.intuition_base_rate = f("intuition.base_rate", INTUITION_BASE_RATE).clamp(0.0, 1.0);
561 self.calibration_bins = i("intuition.calibration_bins", CALIBRATION_BINS).clamp(2, 100);
562 self.situation_coarse_keys = s("situation.coarse_keys", SITUATION_COARSE_KEYS);
563 Ok(())
564 }
565}
566
567struct CandidateInfo {
572 chunk: Value,
573 sim_content: f32,
574 sim_trigger: f32,
575 sim_lexical: f32,
578}
579
580fn signature_has_signal(sig: &str) -> bool {
584 sig.split('|').any(|p| {
585 p.split_once('=')
586 .map(|(_, v)| !v.is_empty() && v != "none" && v != "unknown")
587 .unwrap_or(false)
588 })
589}
590
591fn new_candidate(chunk: &Value) -> CandidateInfo {
594 CandidateInfo {
595 chunk: chunk.clone(),
596 sim_content: 0.0,
597 sim_trigger: 0.0,
598 sim_lexical: 0.0,
599 }
600}
601
602fn chunk_is_valid_for_recall(chunk: &Value, embed_version: i64) -> bool {
603 chunk.get("state").and_then(Value::as_str) != Some("archived")
604 && chunk.get("origin").and_then(Value::as_str) != Some("spark")
605 && chunk
606 .get("embed_version")
607 .and_then(Value::as_i64)
608 .unwrap_or(1)
609 >= embed_version
610}
611
612fn normalize_query(query: &str) -> String {
621 const STOP_WORDS: &[&str] = &[
622 "a", "an", "and", "for", "in", "of", "on", "the", "to", "with",
623 ];
624 let cleaned: String = query
625 .to_lowercase()
626 .chars()
627 .map(|ch| {
628 if ch.is_alphanumeric() || ch.is_whitespace() {
629 ch
630 } else {
631 ' '
632 }
633 })
634 .collect();
635 let mut tokens: Vec<&str> = cleaned
636 .split_whitespace()
637 .filter(|token| !STOP_WORDS.contains(token))
638 .collect();
639 tokens.sort_unstable();
640 tokens.dedup();
641 tokens.join(" ")
642}
643
644fn estimate_distill_prompt_tokens(log: &Value, related_logs: &[Value]) -> i64 {
645 let primary: i64 = [
646 "query",
647 "recall_snapshot",
648 "output",
649 "output_summary",
650 "nomination",
651 ]
652 .iter()
653 .filter_map(|key| log.get(*key).and_then(Value::as_str))
654 .map(|text| estimate_tokens(text) as i64)
655 .sum();
656 let log_id = log.get("id").and_then(Value::as_str).unwrap_or("");
657 let context_key = log.get("context_key").and_then(Value::as_str);
658 let related: i64 = related_logs
659 .iter()
660 .filter(|other| other.get("id").and_then(Value::as_str).unwrap_or("") != log_id)
661 .filter(|other| {
662 context_key.is_some() && other.get("context_key").and_then(Value::as_str) == context_key
663 })
664 .take(4)
665 .flat_map(|other| {
666 ["query", "output_summary", "outcome"]
667 .into_iter()
668 .filter_map(|key| other.get(key).and_then(Value::as_str))
669 })
670 .map(|text| estimate_tokens(text) as i64)
671 .sum();
672 primary + related
673}
674
675fn estimate_distilled_chunk_tokens(chunk: &DistilledChunk) -> i64 {
676 estimate_tokens(&chunk.content) as i64
677 + chunk
678 .trigger_desc
679 .as_deref()
680 .map(estimate_tokens)
681 .unwrap_or(0) as i64
682 + chunk
683 .anti_trigger_desc
684 .as_deref()
685 .map(estimate_tokens)
686 .unwrap_or(0) as i64
687}
688
689fn anti_trigger_hit(query: &str, anti: &str) -> bool {
690 let q_lower = query.to_lowercase();
691 anti.to_lowercase().split(',').any(|part| {
692 let p = part.trim();
693 !p.is_empty() && q_lower.contains(p)
694 })
695}
696
697fn block_cost(block: &[Value]) -> usize {
698 block
699 .iter()
700 .map(|b| {
701 b.get("token_count")
702 .and_then(Value::as_u64)
703 .map(|t| t as usize)
704 .unwrap_or_else(|| {
705 estimate_tokens(b.get("content").and_then(Value::as_str).unwrap_or("")).max(100)
706 })
707 })
708 .sum()
709}
710
711fn limit_knowledge(knowledge: Vec<Value>, top: Option<usize>) -> Vec<Value> {
712 match top {
713 None => knowledge,
714 Some(0) => vec![],
715 Some(n) => knowledge.into_iter().take(n).collect(),
716 }
717}
718
719fn usage_state(used: Option<&[String]>) -> &'static str {
720 match used {
721 None => "unknown",
722 Some([]) => "known_none",
723 Some(_) => "known_some",
724 }
725}
726
727fn ratio(numerator: i64, denominator: i64) -> f64 {
728 if denominator <= 0 {
729 0.0
730 } else {
731 ((numerator as f64 / denominator as f64) * 1000.0).round() / 1000.0
732 }
733}
734
735fn validate_source(source: &str) -> Result<()> {
736 if !matches!(
737 source,
738 "mcp" | "sdk" | "cli" | "hook" | "daemon" | "augmented"
739 ) {
740 return Err(InnateError::InvalidState(format!(
741 "invalid event source: {source}"
742 )));
743 }
744 Ok(())
745}
746
747fn count_query(storage: &Storage, sql: &str) -> Result<i64> {
748 Ok(storage
749 .query_chunks(sql)?
750 .first()
751 .and_then(|r| r.as_object())
752 .and_then(|m| m.values().next())
753 .and_then(Value::as_i64)
754 .unwrap_or(0))
755}
756
757fn count_query_params<P: rusqlite::Params>(storage: &Storage, sql: &str, p: P) -> Result<i64> {
758 Ok(storage
759 .query_chunks_params(sql, p)?
760 .first()
761 .and_then(|r| r.as_object())
762 .and_then(|m| m.values().next())
763 .and_then(Value::as_i64)
764 .unwrap_or(0))
765}
766
767fn days_ago(now_iso: &str, days: i64) -> String {
768 use chrono::{DateTime, Duration, Utc};
769 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
770 let cutoff = t - Duration::days(days);
771 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
772 }
773 now_iso.to_string()
774}
775
776fn minutes_ago(now_iso: &str, minutes: i64) -> String {
777 use chrono::{DateTime, Duration, Utc};
778 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
779 let cutoff = t - Duration::minutes(minutes);
780 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
781 }
782 now_iso.to_string()
783}
784
785fn hours_ago(now_iso: &str, hours: i64) -> String {
786 use chrono::{DateTime, Duration, Utc};
787 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
788 let cutoff = t - Duration::hours(hours);
789 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
790 }
791 now_iso.to_string()
792}
793
794fn minutes_after(now_iso: &str, minutes: i64) -> String {
795 use chrono::{DateTime, Duration, Utc};
796 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
797 let cutoff = t + Duration::minutes(minutes);
798 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
799 }
800 now_iso.to_string()
801}
802
803fn hours_after(now_iso: &str, hours: i64) -> String {
804 use chrono::{DateTime, Duration, Utc};
805 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
806 let cutoff = t + Duration::hours(hours);
807 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
808 }
809 now_iso.to_string()
810}
811
812fn iso_days_diff(now_iso: &str, past_iso: &str) -> i64 {
814 use chrono::{DateTime, Utc};
815 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
816 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
817 let diff = a - b;
818 diff.num_days().max(0)
819 } else {
820 0
821 }
822}
823
824fn iso_fractional_days(now_iso: &str, past_iso: &str) -> f64 {
827 use chrono::{DateTime, Utc};
828 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
829 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
830 ((a - b).num_seconds().max(0)) as f64 / 86_400.0
831 } else {
832 0.0
833 }
834}
835
836const ACTR_DECAY: f64 = 0.5;
838
839pub(super) fn actr_activation(used_count: i64, last_used_at: Option<&str>, now_iso: &str) -> f64 {
852 if used_count <= 0 {
853 return 0.0;
854 }
855 let Some(last) = last_used_at else {
856 return 0.0;
857 };
858 let recency_days = iso_fractional_days(now_iso, last);
859 let b = (1.0 + used_count as f64).ln() - ACTR_DECAY * (1.0 + recency_days).ln();
860 1.0 / (1.0 + (-b).exp())
861}
862
863fn detect_cycles(deps: &[Value]) -> Vec<Vec<String>> {
865 use std::collections::HashMap;
866 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
867 for d in deps {
868 let src = d
869 .get("src")
870 .and_then(Value::as_str)
871 .unwrap_or("")
872 .to_string();
873 let dst = d
874 .get("dst")
875 .and_then(Value::as_str)
876 .unwrap_or("")
877 .to_string();
878 if !src.is_empty() && !dst.is_empty() {
879 adj.entry(src).or_default().push(dst);
880 }
881 }
882 let nodes: Vec<String> = adj.keys().cloned().collect();
883 let mut visited: HashSet<String> = HashSet::new();
884 let mut on_stack: HashSet<String> = HashSet::new();
885 let mut cycles: Vec<Vec<String>> = vec![];
886
887 fn dfs(
888 node: &str,
889 adj: &HashMap<String, Vec<String>>,
890 visited: &mut HashSet<String>,
891 on_stack: &mut HashSet<String>,
892 path: &mut Vec<String>,
893 cycles: &mut Vec<Vec<String>>,
894 ) {
895 if on_stack.contains(node) {
896 let start = path.iter().position(|n| n == node).unwrap_or(0);
898 cycles.push(path[start..].to_vec());
899 return;
900 }
901 if visited.contains(node) {
902 return;
903 }
904 visited.insert(node.to_string());
905 on_stack.insert(node.to_string());
906 path.push(node.to_string());
907 if let Some(children) = adj.get(node) {
908 for child in children {
909 dfs(child, adj, visited, on_stack, path, cycles);
910 }
911 }
912 path.pop();
913 on_stack.remove(node);
914 }
915
916 for node in nodes {
917 let mut path = vec![];
918 dfs(
919 &node,
920 &adj,
921 &mut visited,
922 &mut on_stack,
923 &mut path,
924 &mut cycles,
925 );
926 }
927 cycles
928}