1type PackResult = (
5 Vec<Value>,
6 Vec<(Vec<Value>, f64, usize)>,
7 std::collections::HashMap<String, String>,
8);
9
10use std::collections::{HashMap, HashSet};
11use std::path::Path;
12use std::sync::Arc;
13
14use serde_json::{json, Value};
15
16use crate::embedding::{DummyEmbeddingProvider, EmbeddingProvider};
17use crate::errors::{InnateError, Result};
18use crate::refine::{
19 DefaultSanitizer, DistilledChunk, Distiller, HeuristicDistiller, NullRefiner, Refiner,
20 Sanitizer,
21};
22use crate::storage::{ChunkRow, EpisodicLogRow, Storage};
23use crate::utils::{
24 content_hash, estimate_tokens, gen_uuid, pack_embedding, utc_now_iso, SanitizeAction,
25};
26
27mod appraise;
28mod curate;
29mod evolve;
30mod inspection;
31mod lifecycle;
32mod recall;
33mod record;
34mod repair;
35mod situation;
36
37pub use appraise::{
38 AbstainReason, AppraiseParams, Contributor, FlaggedPoint, Tier, Valence, Verdict,
39 APPRAISE_ADVISORY,
40};
41pub use recall::RecallParams;
42pub use record::RecordParams;
43pub use repair::TraceRepairReport;
44pub use situation::Situation;
45
46const W_CONTENT: f64 = 0.55;
55const W_TRIGGER: f64 = 0.25;
56const W_CONFIDENCE: f64 = 0.10;
57const W_CONTEXT: f64 = 0.15;
58const W_ACTIVATION: f64 = 0.08;
59const TOP_K_CANDIDATES: usize = 20;
60const ANTI_TRIGGER_PENALTY: f64 = 0.6;
61const DENSITY_REFILL: bool = true;
62
63const LOW_CONF_THRESHOLD: f64 = 0.25;
64const LOW_CONF_IDLE_DAYS: i64 = 60;
65const REPEAT_SELECT_MIN: i64 = 10;
66const REPEAT_SELECT_CONF_MAX: f64 = 0.5;
67const NEVER_USED_AGE_DAYS: i64 = 30;
68const OPEN_TTL_DAYS: i64 = 14;
69const SCREENING_TIMEOUT_MINUTES: i64 = 30;
70const PROMOTE_USED_SUCCESS_MIN: i64 = 3;
71const PROMOTE_CONFIDENCE_MIN: f64 = 0.60;
72const DECAY_FLOOR: f64 = 0.20;
73const EVOLVE_THRESHOLD: i64 = 5;
74const DISTILL_BATCH_SIZE: usize = 20;
75const PENDING_RECALL_PENALTY: f64 = 0.60;
76
77const APPRAISE_TIER_WEAK: f64 = 0.30;
80const APPRAISE_TIER_STRONG: f64 = 0.65;
81const APPRAISE_MIN_STRENGTH: f64 = 0.40;
82const APPRAISE_TOP: usize = 8;
83const APPRAISE_TRIGGER_HIT_MIN: f64 = 0.50;
84const APPRAISE_CANDIDATE_IN_EMBED: bool = true;
85const APPRAISE_SIGNATURE_FLOOR: f64 = 0.0;
91const APPRAISE_MIN_EVIDENCE: i64 = 0;
92const APPRAISE_CONFLICT_CEILING: f64 = 1.0;
93const INTUITION_PRIOR_M: f64 = 2.0;
97const INTUITION_BASE_RATE: f64 = 0.5;
98const RECALL_PRIOR_M: f64 = 2.0;
101const RECALL_BASE_RATE: f64 = 0.5;
102const CALIBRATION_BINS: i64 = 10;
104const SITUATION_COARSE_KEYS: &str = "stage,error_class,file_type";
105const GOVERNANCE_ARCHIVE_THRESHOLD: i64 = 3;
106const NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD: i64 = 5;
107const GOVERNANCE_EVOLVE_THRESHOLD: i64 = 3;
108const FAILURE_MIN_USES: i64 = 5;
109const FAILURE_MAX_SUCCESS_RATE: f64 = 0.20;
110const FAILURE_CONFIDENCE_MAX: f64 = 0.35;
111const LOG_COMPACT_DAYS: i64 = 30;
112
113#[derive(Debug, Default, Clone)]
118pub struct RecallResult {
119 pub knowledge: Vec<Value>,
120 pub sparks: Vec<Value>,
121 pub trace_id: String,
122 pub empty: bool,
123 pub depth_skipped: Vec<String>,
124 pub skipped_reasons: HashMap<String, String>,
125}
126
127#[derive(Debug, Default)]
128pub struct CurateReport {
129 pub archived: Vec<String>,
130 pub deduped: Vec<String>,
131 pub decayed: Vec<String>,
132 pub cycles: Vec<Vec<String>>,
133 pub orphans: Vec<String>,
134 pub recovered: Vec<String>,
135 pub warnings: Vec<String>,
136 pub stats: HashMap<String, Value>,
137}
138
139#[derive(Debug, Default)]
140struct DistillBatchReport {
141 distilled: usize,
142 failed: usize,
143}
144
145#[derive(Debug, Default, Clone)]
147pub struct CurateScope {
148 pub origin: Option<String>,
150 pub skill_name: Option<String>,
152 pub dry_run: bool,
154}
155
156pub trait Curator: Send + Sync {
159 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport>;
160}
161
162pub struct BuiltinCurator;
164
165impl Curator for BuiltinCurator {
166 fn run(&self, kb: &KnowledgeBase, scope: &CurateScope) -> Result<CurateReport> {
167 kb.builtin_curate_impl(scope)
168 }
169}
170
171pub struct KnowledgeBase {
176 pub storage: Storage,
177 embedding: Arc<dyn EmbeddingProvider>,
178 refiner: Arc<dyn Refiner>,
179 distiller: Arc<dyn Distiller>,
180 curator: Arc<dyn Curator>,
181 sanitizer: Arc<dyn Sanitizer>,
182
183 w_content: f64,
185 w_trigger: f64,
186 w_confidence: f64,
187 w_context: f64,
188 w_activation: f64,
189 top_k_candidates: usize,
190 anti_trigger_penalty: f64,
191 density_refill: bool,
192
193 low_conf_threshold: f64,
194 low_conf_idle_days: i64,
195 repeat_select_min: i64,
196 repeat_select_conf_max: f64,
197 never_used_age_days: i64,
198 open_ttl_days: i64,
199 screening_timeout_minutes: i64,
200 promote_used_success_min: i64,
201 promote_confidence_min: f64,
202 decay_floor: f64,
203 evolve_threshold: i64,
204 distill_batch_size: usize,
205 evolve_schedule_interval_hours: i64,
206 governance_archive_threshold: i64,
207 negative_feedback_archive_threshold: i64,
208 governance_evolve_threshold: i64,
209 governance_proposal_max_age_days: i64,
210 failure_min_uses: i64,
211 failure_max_success_rate: f64,
212 failure_confidence_max: f64,
213 log_compact_days: i64,
214
215 appraise_tier_weak: f64,
217 appraise_tier_strong: f64,
218 appraise_min_strength: f64,
219 appraise_top: usize,
220 appraise_trigger_hit_min: f64,
221 appraise_candidate_in_embed: bool,
222 appraise_signature_floor: f64,
223 appraise_min_evidence: i64,
224 appraise_conflict_ceiling: f64,
225 intuition_prior_m: f64,
226 intuition_base_rate: f64,
227 calibration_bins: i64,
228 situation_coarse_keys: String,
229}
230
231impl KnowledgeBase {
232 pub fn open(db_path: impl AsRef<Path>) -> Result<Self> {
233 Self::open_with(db_path, None, None, None, None, None)
234 }
235
236 pub(crate) fn store_vec_content(&self, chunk_id: &str, cvec: &[f32]) -> Result<()> {
242 let want = self.embedding.content_dim();
243 if cvec.len() != want {
244 return Err(InnateError::InvalidState(format!(
245 "content embedding dim {} != configured {want} (chunk {chunk_id})",
246 cvec.len()
247 )));
248 }
249 self.storage
250 .insert_vec_content(chunk_id, &pack_embedding(cvec))
251 }
252
253 pub(crate) fn store_vec_trigger(&self, chunk_id: &str, tvec: &[f32]) -> Result<()> {
256 let want = self.embedding.trigger_dim();
257 if tvec.len() != want {
258 return Err(InnateError::InvalidState(format!(
259 "trigger embedding dim {} != configured {want} (chunk {chunk_id})",
260 tvec.len()
261 )));
262 }
263 self.storage
264 .insert_vec_trigger(chunk_id, &pack_embedding(tvec))
265 }
266
267 pub fn open_with(
268 db_path: impl AsRef<Path>,
269 embedding: Option<Arc<dyn EmbeddingProvider>>,
270 refiner: Option<Arc<dyn Refiner>>,
271 distiller: Option<Arc<dyn Distiller>>,
272 curator: Option<Arc<dyn Curator>>,
273 sanitizer: Option<Arc<dyn Sanitizer>>,
274 ) -> Result<Self> {
275 let embedding = embedding.unwrap_or_else(|| Arc::new(DummyEmbeddingProvider::default()));
276 let refiner = refiner.unwrap_or_else(|| Arc::new(NullRefiner));
277 let distiller = distiller.unwrap_or_else(|| Arc::new(HeuristicDistiller));
278 let curator = curator.unwrap_or_else(|| Arc::new(BuiltinCurator));
279 let sanitizer = sanitizer.unwrap_or_else(|| Arc::new(DefaultSanitizer));
280
281 let storage = Storage::open(db_path, embedding.content_dim(), embedding.trigger_dim())?;
282
283 let mut kb = Self {
284 storage,
285 embedding,
286 refiner,
287 distiller,
288 curator,
289 sanitizer,
290 w_content: W_CONTENT,
291 w_trigger: W_TRIGGER,
292 w_confidence: W_CONFIDENCE,
293 w_context: W_CONTEXT,
294 w_activation: W_ACTIVATION,
295 top_k_candidates: TOP_K_CANDIDATES,
296 anti_trigger_penalty: ANTI_TRIGGER_PENALTY,
297 density_refill: DENSITY_REFILL,
298 low_conf_threshold: LOW_CONF_THRESHOLD,
299 low_conf_idle_days: LOW_CONF_IDLE_DAYS,
300 repeat_select_min: REPEAT_SELECT_MIN,
301 repeat_select_conf_max: REPEAT_SELECT_CONF_MAX,
302 never_used_age_days: NEVER_USED_AGE_DAYS,
303 open_ttl_days: OPEN_TTL_DAYS,
304 screening_timeout_minutes: SCREENING_TIMEOUT_MINUTES,
305 promote_used_success_min: PROMOTE_USED_SUCCESS_MIN,
306 promote_confidence_min: PROMOTE_CONFIDENCE_MIN,
307 decay_floor: DECAY_FLOOR,
308 evolve_threshold: EVOLVE_THRESHOLD,
309 distill_batch_size: DISTILL_BATCH_SIZE,
310 evolve_schedule_interval_hours: 6,
311 governance_archive_threshold: GOVERNANCE_ARCHIVE_THRESHOLD,
312 negative_feedback_archive_threshold: NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
313 governance_evolve_threshold: GOVERNANCE_EVOLVE_THRESHOLD,
314 governance_proposal_max_age_days: 30,
315 failure_min_uses: FAILURE_MIN_USES,
316 failure_max_success_rate: FAILURE_MAX_SUCCESS_RATE,
317 failure_confidence_max: FAILURE_CONFIDENCE_MAX,
318 log_compact_days: LOG_COMPACT_DAYS,
319 appraise_tier_weak: APPRAISE_TIER_WEAK,
320 appraise_tier_strong: APPRAISE_TIER_STRONG,
321 appraise_min_strength: APPRAISE_MIN_STRENGTH,
322 appraise_top: APPRAISE_TOP,
323 appraise_trigger_hit_min: APPRAISE_TRIGGER_HIT_MIN,
324 appraise_candidate_in_embed: APPRAISE_CANDIDATE_IN_EMBED,
325 appraise_signature_floor: APPRAISE_SIGNATURE_FLOOR,
326 appraise_min_evidence: APPRAISE_MIN_EVIDENCE,
327 appraise_conflict_ceiling: APPRAISE_CONFLICT_CEILING,
328 intuition_prior_m: INTUITION_PRIOR_M,
329 intuition_base_rate: INTUITION_BASE_RATE,
330 calibration_bins: CALIBRATION_BINS,
331 situation_coarse_keys: SITUATION_COARSE_KEYS.to_string(),
332 };
333 kb.init_meta()?;
334 kb.load_params()?;
335 Ok(kb)
336 }
337
338 fn init_meta(&self) -> Result<()> {
339 let lib_id = gen_uuid();
340 let content_dim = self.embedding.content_dim().to_string();
341 let trigger_dim = self.embedding.trigger_dim().to_string();
342 let embed_model = self.embedding.model_name();
343
344 for (key, expected) in [
345 ("content_dim", self.embedding.content_dim()),
346 ("trigger_dim", self.embedding.trigger_dim()),
347 ] {
348 if let Some(stored) = self.storage.get_meta(key)? {
349 let actual = stored.parse::<usize>().map_err(|_| {
350 InnateError::Other(format!("invalid {key} metadata value: {stored}"))
351 })?;
352 if actual != expected {
353 return Err(InnateError::Other(format!(
354 "{key} mismatch: database uses {actual}, embedding provider uses {expected}"
355 )));
356 }
357 }
358 }
359
360 let defaults: &[(&str, &str)] = &[
361 ("lib_id", &lib_id),
362 ("lib_role", "personal"),
363 ("schema_version", "4.14"),
364 ("content_dim", &content_dim),
365 ("trigger_dim", &trigger_dim),
366 ("embed_model", embed_model),
367 ("embed_version", "1"),
368 ("vector_revision", "0"),
369 ("last_agg_ts", "1970-01-01T00:00:00.000Z"),
370 ("recall.w_content", "0.55"),
371 ("recall.w_trigger", "0.25"),
372 ("recall.w_confidence", "0.10"),
373 ("recall.w_context", "0.15"),
374 ("recall.w_activation", "0.08"),
375 ("recall.top_k_candidates", "20"),
376 ("recall.anti_trigger_penalty", "0.6"),
377 ("recall.density_refill", "true"),
378 ("curate.low_conf_threshold", "0.25"),
379 ("curate.low_conf_idle_days", "60"),
380 ("curate.repeat_select_min", "10"),
381 ("curate.repeat_select_conf_max", "0.5"),
382 ("curate.never_used_age_days", "30"),
383 ("curate.open_ttl_days", "14"),
384 ("curate.screening_timeout_minutes", "30"),
385 ("curate.promote_used_success_min", "3"),
386 ("curate.promote_confidence_min", "0.60"),
387 ("curate.decay_floor", "0.20"),
388 ("evolve.threshold_new_count", "5"),
389 ("evolve.distill_batch_size", "20"),
390 ("evolve.schedule_interval_hours", "6"),
391 ("curate.soft_mature_threshold", "5"),
392 ("evolve.distill_token_window_hours", "24"),
393 ("curate.governance_archive_threshold", "3"),
394 ("curate.negative_feedback_archive_threshold", "5"),
395 ("evolve.governance_pending_threshold", "3"),
396 ("curate.governance_proposal_max_age_days", "30"),
397 ("curate.failure_min_uses", "5"),
398 ("curate.failure_max_success_rate", "0.20"),
399 ("curate.failure_confidence_max", "0.35"),
400 ("curate.log_compact_days", "30"),
401 ("appraise.tier_weak", "0.30"),
402 ("appraise.tier_strong", "0.65"),
403 ("appraise.min_strength", "0.40"),
404 ("appraise.top", "8"),
405 ("appraise.trigger_hit_min", "0.50"),
406 ("appraise.candidate_in_embed", "true"),
407 ("appraise.signature_floor", "0.0"),
408 ("appraise.min_evidence", "0"),
409 ("appraise.conflict_ceiling", "1.0"),
410 ("intuition.prior_m", "2.0"),
411 ("intuition.base_rate", "0.5"),
412 ("intuition.calibration_bins", "10"),
413 ("situation.coarse_keys", "stage,error_class,file_type"),
414 ];
415 self.storage.begin_immediate()?;
416 let result = (|| -> Result<()> {
417 for (k, v) in defaults {
418 if self.storage.get_meta(k)?.is_none() {
419 self.storage.set_meta(k, v)?;
420 }
421 }
422 self.storage.commit()
423 })();
424 if result.is_err() {
425 let _ = self.storage.rollback();
426 }
427 result
428 }
429
430 fn load_params(&mut self) -> Result<()> {
431 let f = |k: &str, d: f64| -> f64 {
432 self.storage
433 .get_meta(k)
434 .ok()
435 .flatten()
436 .and_then(|v| v.parse().ok())
437 .unwrap_or(d)
438 };
439 let i = |k: &str, d: i64| -> i64 {
440 self.storage
441 .get_meta(k)
442 .ok()
443 .flatten()
444 .and_then(|v| v.parse().ok())
445 .unwrap_or(d)
446 };
447 let b = |k: &str, d: bool| -> bool {
448 self.storage
449 .get_meta(k)
450 .ok()
451 .flatten()
452 .map(|v| v.to_lowercase() == "true")
453 .unwrap_or(d)
454 };
455 self.w_content = f("recall.w_content", W_CONTENT);
456 self.w_trigger = f("recall.w_trigger", W_TRIGGER);
457 self.w_confidence = f("recall.w_confidence", W_CONFIDENCE);
458 self.w_context = f("recall.w_context", W_CONTEXT);
459 self.w_activation = f("recall.w_activation", W_ACTIVATION);
460 self.top_k_candidates =
461 i("recall.top_k_candidates", TOP_K_CANDIDATES as i64).max(1) as usize;
462 self.anti_trigger_penalty = f("recall.anti_trigger_penalty", ANTI_TRIGGER_PENALTY);
463 self.density_refill = b("recall.density_refill", DENSITY_REFILL);
464 self.low_conf_threshold = f("curate.low_conf_threshold", LOW_CONF_THRESHOLD);
465 self.low_conf_idle_days = i("curate.low_conf_idle_days", LOW_CONF_IDLE_DAYS);
466 self.repeat_select_min = i("curate.repeat_select_min", REPEAT_SELECT_MIN);
467 self.repeat_select_conf_max = f("curate.repeat_select_conf_max", REPEAT_SELECT_CONF_MAX);
468 self.never_used_age_days = i("curate.never_used_age_days", NEVER_USED_AGE_DAYS);
469 self.open_ttl_days = i("curate.open_ttl_days", OPEN_TTL_DAYS);
470 self.screening_timeout_minutes = i(
471 "curate.screening_timeout_minutes",
472 SCREENING_TIMEOUT_MINUTES,
473 );
474 self.promote_used_success_min =
475 i("curate.promote_used_success_min", PROMOTE_USED_SUCCESS_MIN);
476 self.promote_confidence_min = f("curate.promote_confidence_min", PROMOTE_CONFIDENCE_MIN);
477 self.decay_floor = f("curate.decay_floor", DECAY_FLOOR).clamp(0.0, 0.4);
478 self.evolve_threshold = i("evolve.threshold_new_count", EVOLVE_THRESHOLD);
479 self.distill_batch_size =
480 i("evolve.distill_batch_size", DISTILL_BATCH_SIZE as i64) as usize;
481 self.evolve_schedule_interval_hours = i("evolve.schedule_interval_hours", 6).max(1);
482 self.governance_archive_threshold = i(
483 "curate.governance_archive_threshold",
484 GOVERNANCE_ARCHIVE_THRESHOLD,
485 )
486 .max(1);
487 self.negative_feedback_archive_threshold = i(
488 "curate.negative_feedback_archive_threshold",
489 NEGATIVE_FEEDBACK_ARCHIVE_THRESHOLD,
490 )
491 .max(1);
492 self.governance_evolve_threshold = i(
493 "evolve.governance_pending_threshold",
494 GOVERNANCE_EVOLVE_THRESHOLD,
495 )
496 .max(1);
497 self.governance_proposal_max_age_days =
498 i("curate.governance_proposal_max_age_days", 30).max(1);
499 self.failure_min_uses = i("curate.failure_min_uses", FAILURE_MIN_USES).max(1);
500 self.failure_max_success_rate =
501 f("curate.failure_max_success_rate", FAILURE_MAX_SUCCESS_RATE).clamp(0.0, 1.0);
502 self.failure_confidence_max =
503 f("curate.failure_confidence_max", FAILURE_CONFIDENCE_MAX).clamp(0.0, 1.0);
504 self.log_compact_days = i("curate.log_compact_days", LOG_COMPACT_DAYS).max(1);
505 let s = |k: &str, d: &str| -> String {
506 self.storage
507 .get_meta(k)
508 .ok()
509 .flatten()
510 .filter(|v| !v.trim().is_empty())
511 .unwrap_or_else(|| d.to_string())
512 };
513 self.appraise_tier_weak = f("appraise.tier_weak", APPRAISE_TIER_WEAK).clamp(0.0, 1.0);
514 self.appraise_tier_strong = f("appraise.tier_strong", APPRAISE_TIER_STRONG).clamp(0.0, 1.0);
515 self.appraise_min_strength =
516 f("appraise.min_strength", APPRAISE_MIN_STRENGTH).clamp(0.0, 1.0);
517 self.appraise_top = i("appraise.top", APPRAISE_TOP as i64).max(1) as usize;
518 self.appraise_trigger_hit_min =
519 f("appraise.trigger_hit_min", APPRAISE_TRIGGER_HIT_MIN).clamp(0.0, 1.0);
520 self.appraise_candidate_in_embed =
521 b("appraise.candidate_in_embed", APPRAISE_CANDIDATE_IN_EMBED);
522 self.appraise_signature_floor =
523 f("appraise.signature_floor", APPRAISE_SIGNATURE_FLOOR).clamp(0.0, 1.0);
524 self.appraise_min_evidence = i("appraise.min_evidence", APPRAISE_MIN_EVIDENCE).max(0);
525 self.appraise_conflict_ceiling =
526 f("appraise.conflict_ceiling", APPRAISE_CONFLICT_CEILING).clamp(0.0, 1.0);
527 self.intuition_prior_m = f("intuition.prior_m", INTUITION_PRIOR_M).max(0.0);
528 self.intuition_base_rate = f("intuition.base_rate", INTUITION_BASE_RATE).clamp(0.0, 1.0);
529 self.calibration_bins = i("intuition.calibration_bins", CALIBRATION_BINS).clamp(2, 100);
530 self.situation_coarse_keys = s("situation.coarse_keys", SITUATION_COARSE_KEYS);
531 Ok(())
532 }
533}
534
535struct CandidateInfo {
540 chunk: Value,
541 sim_content: f32,
542 sim_trigger: f32,
543}
544
545fn chunk_is_valid_for_recall(chunk: &Value, embed_version: i64) -> bool {
546 chunk.get("state").and_then(Value::as_str) != Some("archived")
547 && chunk.get("origin").and_then(Value::as_str) != Some("spark")
548 && chunk
549 .get("embed_version")
550 .and_then(Value::as_i64)
551 .unwrap_or(1)
552 >= embed_version
553}
554
555fn normalize_query(query: &str) -> String {
564 const STOP_WORDS: &[&str] = &[
565 "a", "an", "and", "for", "in", "of", "on", "the", "to", "with",
566 ];
567 let cleaned: String = query
568 .to_lowercase()
569 .chars()
570 .map(|ch| {
571 if ch.is_alphanumeric() || ch.is_whitespace() {
572 ch
573 } else {
574 ' '
575 }
576 })
577 .collect();
578 let mut tokens: Vec<&str> = cleaned
579 .split_whitespace()
580 .filter(|token| !STOP_WORDS.contains(token))
581 .collect();
582 tokens.sort_unstable();
583 tokens.dedup();
584 tokens.join(" ")
585}
586
587fn estimate_distill_prompt_tokens(log: &Value, related_logs: &[Value]) -> i64 {
588 let primary: i64 = [
589 "query",
590 "recall_snapshot",
591 "output",
592 "output_summary",
593 "nomination",
594 ]
595 .iter()
596 .filter_map(|key| log.get(*key).and_then(Value::as_str))
597 .map(|text| estimate_tokens(text) as i64)
598 .sum();
599 let log_id = log.get("id").and_then(Value::as_str).unwrap_or("");
600 let context_key = log.get("context_key").and_then(Value::as_str);
601 let related: i64 = related_logs
602 .iter()
603 .filter(|other| other.get("id").and_then(Value::as_str).unwrap_or("") != log_id)
604 .filter(|other| {
605 context_key.is_some() && other.get("context_key").and_then(Value::as_str) == context_key
606 })
607 .take(4)
608 .flat_map(|other| {
609 ["query", "output_summary", "outcome"]
610 .into_iter()
611 .filter_map(|key| other.get(key).and_then(Value::as_str))
612 })
613 .map(|text| estimate_tokens(text) as i64)
614 .sum();
615 primary + related
616}
617
618fn estimate_distilled_chunk_tokens(chunk: &DistilledChunk) -> i64 {
619 estimate_tokens(&chunk.content) as i64
620 + chunk
621 .trigger_desc
622 .as_deref()
623 .map(estimate_tokens)
624 .unwrap_or(0) as i64
625 + chunk
626 .anti_trigger_desc
627 .as_deref()
628 .map(estimate_tokens)
629 .unwrap_or(0) as i64
630}
631
632fn anti_trigger_hit(query: &str, anti: &str) -> bool {
633 let q_lower = query.to_lowercase();
634 anti.to_lowercase().split(',').any(|part| {
635 let p = part.trim();
636 !p.is_empty() && q_lower.contains(p)
637 })
638}
639
640fn block_cost(block: &[Value]) -> usize {
641 block
642 .iter()
643 .map(|b| {
644 b.get("token_count")
645 .and_then(Value::as_u64)
646 .map(|t| t as usize)
647 .unwrap_or_else(|| {
648 estimate_tokens(b.get("content").and_then(Value::as_str).unwrap_or("")).max(100)
649 })
650 })
651 .sum()
652}
653
654fn limit_knowledge(knowledge: Vec<Value>, top: Option<usize>) -> Vec<Value> {
655 match top {
656 None => knowledge,
657 Some(0) => vec![],
658 Some(n) => knowledge.into_iter().take(n).collect(),
659 }
660}
661
662fn usage_state(used: Option<&[String]>) -> &'static str {
663 match used {
664 None => "unknown",
665 Some([]) => "known_none",
666 Some(_) => "known_some",
667 }
668}
669
670fn ratio(numerator: i64, denominator: i64) -> f64 {
671 if denominator <= 0 {
672 0.0
673 } else {
674 ((numerator as f64 / denominator as f64) * 1000.0).round() / 1000.0
675 }
676}
677
678fn validate_source(source: &str) -> Result<()> {
679 if !matches!(
680 source,
681 "mcp" | "sdk" | "cli" | "hook" | "daemon" | "augmented"
682 ) {
683 return Err(InnateError::InvalidState(format!(
684 "invalid event source: {source}"
685 )));
686 }
687 Ok(())
688}
689
690fn count_query(storage: &Storage, sql: &str) -> Result<i64> {
691 Ok(storage
692 .query_chunks(sql)?
693 .first()
694 .and_then(|r| r.as_object())
695 .and_then(|m| m.values().next())
696 .and_then(Value::as_i64)
697 .unwrap_or(0))
698}
699
700fn count_query_params<P: rusqlite::Params>(storage: &Storage, sql: &str, p: P) -> Result<i64> {
701 Ok(storage
702 .query_chunks_params(sql, p)?
703 .first()
704 .and_then(|r| r.as_object())
705 .and_then(|m| m.values().next())
706 .and_then(Value::as_i64)
707 .unwrap_or(0))
708}
709
710fn days_ago(now_iso: &str, days: i64) -> String {
711 use chrono::{DateTime, Duration, Utc};
712 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
713 let cutoff = t - Duration::days(days);
714 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
715 }
716 now_iso.to_string()
717}
718
719fn minutes_ago(now_iso: &str, minutes: i64) -> String {
720 use chrono::{DateTime, Duration, Utc};
721 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
722 let cutoff = t - Duration::minutes(minutes);
723 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
724 }
725 now_iso.to_string()
726}
727
728fn hours_ago(now_iso: &str, hours: i64) -> String {
729 use chrono::{DateTime, Duration, Utc};
730 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
731 let cutoff = t - Duration::hours(hours);
732 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
733 }
734 now_iso.to_string()
735}
736
737fn minutes_after(now_iso: &str, minutes: i64) -> String {
738 use chrono::{DateTime, Duration, Utc};
739 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
740 let cutoff = t + Duration::minutes(minutes);
741 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
742 }
743 now_iso.to_string()
744}
745
746fn hours_after(now_iso: &str, hours: i64) -> String {
747 use chrono::{DateTime, Duration, Utc};
748 if let Ok(t) = now_iso.parse::<DateTime<Utc>>() {
749 let cutoff = t + Duration::hours(hours);
750 return cutoff.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string();
751 }
752 now_iso.to_string()
753}
754
755fn iso_days_diff(now_iso: &str, past_iso: &str) -> i64 {
757 use chrono::{DateTime, Utc};
758 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
759 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
760 let diff = a - b;
761 diff.num_days().max(0)
762 } else {
763 0
764 }
765}
766
767fn iso_fractional_days(now_iso: &str, past_iso: &str) -> f64 {
770 use chrono::{DateTime, Utc};
771 let parse = |s: &str| s.parse::<DateTime<Utc>>().ok();
772 if let (Some(a), Some(b)) = (parse(now_iso), parse(past_iso)) {
773 ((a - b).num_seconds().max(0)) as f64 / 86_400.0
774 } else {
775 0.0
776 }
777}
778
779const ACTR_DECAY: f64 = 0.5;
781
782pub(super) fn actr_activation(used_count: i64, last_used_at: Option<&str>, now_iso: &str) -> f64 {
795 if used_count <= 0 {
796 return 0.0;
797 }
798 let Some(last) = last_used_at else {
799 return 0.0;
800 };
801 let recency_days = iso_fractional_days(now_iso, last);
802 let b = (1.0 + used_count as f64).ln() - ACTR_DECAY * (1.0 + recency_days).ln();
803 1.0 / (1.0 + (-b).exp())
804}
805
806fn detect_cycles(deps: &[Value]) -> Vec<Vec<String>> {
808 use std::collections::HashMap;
809 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
810 for d in deps {
811 let src = d
812 .get("src")
813 .and_then(Value::as_str)
814 .unwrap_or("")
815 .to_string();
816 let dst = d
817 .get("dst")
818 .and_then(Value::as_str)
819 .unwrap_or("")
820 .to_string();
821 if !src.is_empty() && !dst.is_empty() {
822 adj.entry(src).or_default().push(dst);
823 }
824 }
825 let nodes: Vec<String> = adj.keys().cloned().collect();
826 let mut visited: HashSet<String> = HashSet::new();
827 let mut on_stack: HashSet<String> = HashSet::new();
828 let mut cycles: Vec<Vec<String>> = vec![];
829
830 fn dfs(
831 node: &str,
832 adj: &HashMap<String, Vec<String>>,
833 visited: &mut HashSet<String>,
834 on_stack: &mut HashSet<String>,
835 path: &mut Vec<String>,
836 cycles: &mut Vec<Vec<String>>,
837 ) {
838 if on_stack.contains(node) {
839 let start = path.iter().position(|n| n == node).unwrap_or(0);
841 cycles.push(path[start..].to_vec());
842 return;
843 }
844 if visited.contains(node) {
845 return;
846 }
847 visited.insert(node.to_string());
848 on_stack.insert(node.to_string());
849 path.push(node.to_string());
850 if let Some(children) = adj.get(node) {
851 for child in children {
852 dfs(child, adj, visited, on_stack, path, cycles);
853 }
854 }
855 path.pop();
856 on_stack.remove(node);
857 }
858
859 for node in nodes {
860 let mut path = vec![];
861 dfs(
862 &node,
863 &adj,
864 &mut visited,
865 &mut on_stack,
866 &mut path,
867 &mut cycles,
868 );
869 }
870 cycles
871}