1use super::*;
2
3impl KnowledgeBase {
4 pub fn inspect(&self) -> Result<Value> {
5 let total: i64 = count_query(
6 &self.storage,
7 "SELECT COUNT(*) FROM chunks WHERE origin!='spark'",
8 )?;
9 let active: i64 = count_query(
10 &self.storage,
11 "SELECT COUNT(*) FROM chunks WHERE state='active' AND origin!='spark'",
12 )?;
13 let pending: i64 = count_query(
14 &self.storage,
15 "SELECT COUNT(*) FROM chunks WHERE state='pending' AND origin!='spark'",
16 )?;
17 let archived: i64 = count_query(
18 &self.storage,
19 "SELECT COUNT(*) FROM chunks WHERE state='archived' AND origin!='spark'",
20 )?;
21 let sparks: i64 = count_query(
22 &self.storage,
23 "SELECT COUNT(*) FROM chunks WHERE origin='spark' AND state!='archived'",
24 )?;
25 let open_logs: i64 = count_query(
26 &self.storage,
27 "SELECT COUNT(*) FROM episodic_log WHERE distill_state='open'",
28 )?;
29 let new_logs: i64 = count_query(
30 &self.storage,
31 "SELECT COUNT(*) FROM episodic_log WHERE distill_state='new'",
32 )?;
33 let embed_rebuild: i64 = count_query(&self.storage,
34 "SELECT COUNT(*) FROM chunks WHERE embed_version=0 OR embed_version < (SELECT COALESCE(CAST(value AS INTEGER),1) FROM meta WHERE key='embed_version')")?;
35 let schema_version = self.storage.get_meta_or("schema_version", "?");
36 let lib_id = self.storage.get_meta_or("lib_id", "?");
37 let last_agg = self.storage.get_meta_or("last_agg_ts", "never");
38
39 let metric_window_start = days_ago(&utc_now_iso(), 30);
40 let trace_metrics = self.storage.query_chunks_params(
41 "SELECT COUNT(*) AS total,
42 SUM(CASE WHEN task_state='completed' THEN 1 ELSE 0 END) AS completed,
43 SUM(CASE WHEN task_state='timed_out' THEN 1 ELSE 0 END) AS timed_out,
44 SUM(CASE WHEN task_state='completed' AND usage_state!='unknown'
45 THEN 1 ELSE 0 END) AS usage_known,
46 SUM(CASE WHEN task_state='completed' AND usage_state='known_some'
47 THEN 1 ELSE 0 END) AS usage_some,
48 SUM(CASE WHEN task_state='completed'
49 AND outcome IN ('ok','fail')
50 THEN 1 ELSE 0 END) AS outcome_known,
51 SUM(CASE WHEN outcome='ok' THEN 1 ELSE 0 END) AS succeeded
52 FROM episodic_log WHERE ts >= ?",
53 rusqlite::params![metric_window_start],
54 )?;
55 let trace_row = trace_metrics.first();
56 let trace_total = trace_row
57 .and_then(|row| row.get("total"))
58 .and_then(Value::as_i64)
59 .unwrap_or(0);
60 let trace_completed = trace_row
61 .and_then(|row| row.get("completed"))
62 .and_then(Value::as_i64)
63 .unwrap_or(0);
64 let trace_timed_out = trace_row
65 .and_then(|row| row.get("timed_out"))
66 .and_then(Value::as_i64)
67 .unwrap_or(0);
68 let usage_known = trace_row
69 .and_then(|row| row.get("usage_known"))
70 .and_then(Value::as_i64)
71 .unwrap_or(0);
72 let usage_some = trace_row
73 .and_then(|row| row.get("usage_some"))
74 .and_then(Value::as_i64)
75 .unwrap_or(0);
76 let succeeded = trace_row
77 .and_then(|row| row.get("succeeded"))
78 .and_then(Value::as_i64)
79 .unwrap_or(0);
80 let outcome_known = trace_row
81 .and_then(|row| row.get("outcome_known"))
82 .and_then(Value::as_i64)
83 .unwrap_or(0);
84 let usage_rows = self.storage.query_chunks_params(
85 "SELECT recall_snapshot, used_ids FROM episodic_log
86 WHERE task_state='completed'
87 AND usage_state!='unknown' AND used_complete=1
88 AND recall_snapshot IS NOT NULL AND used_ids IS NOT NULL
89 AND ts >= ?",
90 rusqlite::params![metric_window_start],
91 )?;
92 let mut selected_total = 0_i64;
93 let mut selected_used = 0_i64;
94 for row in usage_rows {
95 let selected: HashSet<String> = row
96 .get("recall_snapshot")
97 .and_then(Value::as_str)
98 .and_then(|raw| serde_json::from_str::<Value>(raw).ok())
99 .and_then(|snapshot| snapshot.get("selected").cloned())
100 .and_then(|value| serde_json::from_value::<Vec<String>>(value).ok())
101 .unwrap_or_default()
102 .into_iter()
103 .collect();
104 let used: HashSet<String> = row
105 .get("used_ids")
106 .and_then(Value::as_str)
107 .and_then(|raw| serde_json::from_str::<Vec<String>>(raw).ok())
108 .unwrap_or_default()
109 .into_iter()
110 .collect();
111 selected_total += selected.len() as i64;
112 selected_used += selected.intersection(&used).count() as i64;
113 }
114 let feedback_count = count_query_params(
115 &self.storage,
116 "SELECT COUNT(*) FROM feedback_events WHERE ts >= ?",
117 rusqlite::params![metric_window_start],
118 )?;
119 let feedback_traces = count_query_params(
120 &self.storage,
121 "SELECT COUNT(DISTINCT f.trace_id)
122 FROM feedback_events f
123 JOIN episodic_log e ON e.trace_id=f.trace_id
124 WHERE f.ts >= ? AND e.ts >= ? AND e.task_state='completed'",
125 rusqlite::params![metric_window_start, metric_window_start],
126 )?;
127 let pending_evolve = count_query(
128 &self.storage,
129 "SELECT COUNT(*) FROM evolve_requests WHERE state IN ('pending','running')",
130 )?;
131 let governance_pending = count_query(
132 &self.storage,
133 "SELECT COUNT(*) FROM governance_proposals WHERE state='pending'",
134 )?;
135 let failed_evolve = count_query_params(
136 &self.storage,
137 "SELECT COUNT(*) FROM evolve_requests
138 WHERE last_failed_at >= ?",
139 rusqlite::params![metric_window_start],
140 )?;
141 let failed_distill = count_query_params(
142 &self.storage,
143 "SELECT COUNT(*) FROM episodic_log
144 WHERE distill_last_failed_at >= ?",
145 rusqlite::params![metric_window_start],
146 )?;
147 let confidence_buckets = self.storage.query_chunks(&format!(
148 "SELECT
149 SUM(CASE WHEN confidence < 0.25 THEN 1 ELSE 0 END) AS low,
150 SUM(CASE WHEN confidence >= 0.25 AND confidence < {0} THEN 1 ELSE 0 END) AS medium,
151 SUM(CASE WHEN confidence >= {0} THEN 1 ELSE 0 END) AS high
152 FROM chunks WHERE origin!='spark' AND state!='archived'",
153 self.promote_confidence_min
154 ))?;
155 let confidence_row = confidence_buckets.first();
156
157 let pending_oldest_ts = self.storage.query_chunks(
159 "SELECT MIN(created_at) AS oldest FROM chunks WHERE state='pending' AND origin!='spark'",
160 )?.into_iter().next()
161 .and_then(|r| r.get("oldest").cloned())
162 .and_then(|v| if v.is_null() { None } else { Some(v) });
163
164 let zombie_cutoff = days_ago(&utc_now_iso(), 14);
169 let zombie: i64 = count_query_params(
170 &self.storage,
171 "SELECT COUNT(*) FROM chunks
172 WHERE origin!='spark' AND state='active'
173 AND confidence >= 0.4 AND confidence <= 0.6
174 AND last_used_at IS NOT NULL
175 AND created_at < ?",
176 rusqlite::params![zombie_cutoff],
177 )?;
178 let debt_numerator = pending + zombie;
179 let debt_denominator = active.max(1);
180 let debt_ratio = debt_numerator as f64 / debt_denominator as f64;
181
182 let screening_cutoff = minutes_ago(&utc_now_iso(), self.screening_timeout_minutes);
184 let stale_screening: i64 = count_query_params(
185 &self.storage,
186 "SELECT COUNT(*) FROM episodic_log
187 WHERE distill_state='screening' AND distill_locked_at < ?",
188 rusqlite::params![screening_cutoff],
189 )?;
190
191 let distill_period_start = self.distill_token_period_start(&utc_now_iso())?;
193 let distill_cost = self.storage.query_chunks_params(
194 "SELECT COALESCE(SUM(prompt_tokens),0) AS pt,
195 COALESCE(SUM(completion_tokens),0) AS ct
196 FROM distill_token_usage
197 WHERE accounted_at >= ?",
198 rusqlite::params![distill_period_start],
199 )?;
200 let prompt_tokens = distill_cost
201 .first()
202 .and_then(|r| r.get("pt"))
203 .and_then(Value::as_i64)
204 .unwrap_or(0);
205 let completion_tokens = distill_cost
206 .first()
207 .and_then(|r| r.get("ct"))
208 .and_then(Value::as_i64)
209 .unwrap_or(0);
210
211 let spark_threshold: i64 = self
213 .storage
214 .get_meta("curate.soft_mature_threshold")
215 .ok()
216 .flatten()
217 .and_then(|v| v.parse::<i64>().ok())
218 .unwrap_or(5);
219 let recurring_sparks = self.storage.query_chunks_params(
220 "SELECT ut.chunk_id, COUNT(*) AS cnt,
221 c.content, c.trigger_desc, c.maturity
222 FROM usage_trace ut
223 JOIN chunks c ON c.id = ut.chunk_id
224 WHERE ut.event='retrieved'
225 AND c.origin='spark'
226 GROUP BY ut.chunk_id HAVING cnt >= ?",
227 rusqlite::params![spark_threshold],
228 )?;
229 let recurring_spark_ids: Vec<Value> = recurring_sparks
230 .iter()
231 .map(|r| {
232 json!({
233 "id": r.get("chunk_id").and_then(Value::as_str).unwrap_or(""),
234 "retrieved_count": r.get("cnt").and_then(Value::as_i64).unwrap_or(0),
235 "maturity": r.get("maturity").and_then(Value::as_str).unwrap_or(""),
236 "content_preview": r.get("content").and_then(Value::as_str).unwrap_or("")
237 .chars().take(80).collect::<String>(),
238 })
239 })
240 .collect();
241
242 let mut suggestions: Vec<Value> = Vec::new();
243 if embed_rebuild > 0 {
244 suggestions.push(json!({"action": "innate evolve --rebuild-embeddings", "reason": format!("{embed_rebuild} chunk(s) missing embeddings")}));
245 }
246 if new_logs > 0 {
247 suggestions.push(json!({"action": "innate evolve --trigger manual", "reason": format!("{new_logs} episodic log(s) ready to distill")}));
248 }
249 if pending > 0 {
250 suggestions.push(json!({"action": "innate approve <id> # or innate archive <id>", "reason": format!("{pending} pending chunk(s) awaiting review")}));
251 }
252 if !recurring_spark_ids.is_empty() {
253 suggestions.push(json!({"action": "innate promote-spark <id> --to note", "reason": format!("{} spark(s) recalled ≥{spark_threshold}× — consider promoting", recurring_spark_ids.len())}));
254 }
255 if stale_screening > 0 {
256 suggestions.push(json!({"action": "innate evolve --trigger manual", "reason": format!("{stale_screening} episodic log(s) stuck in screening")}));
257 }
258 if governance_pending > 0 {
259 suggestions.push(json!({
260 "action": "review governance_proposals",
261 "reason": format!("{governance_pending} chunk(s) have repeated negative feedback")
262 }));
263 }
264
265 let intuition = self.intuition_calibration(&metric_window_start)?;
268 let appraisals = intuition.get("appraisals").and_then(Value::as_i64).unwrap_or(0);
269 let mono_gap = intuition
270 .get("monotonicity_gap")
271 .and_then(Value::as_f64)
272 .unwrap_or(0.0);
273 let false_alarm = intuition
274 .get("false_alarm_rate")
275 .and_then(Value::as_f64)
276 .unwrap_or(0.0);
277 if appraisals >= 20 && mono_gap <= 0.0 {
278 suggestions.push(json!({
279 "action": "tune recall.w_* / situation.coarse_keys",
280 "reason": "appraise strength may be noise — strong tier does not beat weak on task_ok"
281 }));
282 }
283 if appraisals >= 20 && false_alarm >= 0.5 {
284 suggestions.push(json!({
285 "action": "review caution chunks / raise appraise.tier_strong",
286 "reason": format!("intuition false-alarm rate {false_alarm} — strong cautions often end ok")
287 }));
288 }
289
290 let usage_trace_total = count_query(&self.storage, "SELECT COUNT(*) FROM usage_trace")?;
294 let episodic_log_total = count_query(&self.storage, "SELECT COUNT(*) FROM episodic_log")?;
295 let page_count = count_query(&self.storage, "PRAGMA page_count")?;
296 let page_size = count_query(&self.storage, "PRAGMA page_size")?;
297 let db_size_bytes = page_count * page_size;
298
299 Ok(json!({
300 "schema_version": schema_version,
301 "lib_id": lib_id,
302 "last_agg_ts": last_agg,
303 "chunks": {
304 "total": total, "active": active, "pending": pending, "archived": archived,
305 "pending_oldest_ts": pending_oldest_ts,
306 },
307 "storage": {
308 "usage_trace_rows": usage_trace_total,
309 "episodic_log_rows": episodic_log_total,
310 "db_size_bytes": db_size_bytes,
311 "db_size_mb": (db_size_bytes as f64 / 1_048_576.0 * 100.0).round() / 100.0,
312 },
313 "sparks": sparks,
314 "episodic_log": {"open": open_logs, "new": new_logs},
315 "embed_rebuild_queue": embed_rebuild,
316 "knowledge_debt_ratio": (debt_ratio * 100.0).round() / 100.0,
317 "stale_screening_count": stale_screening,
318 "feedback_loop": {
319 "trace_completion_rate": ratio(trace_completed, trace_total),
320 "usage_annotation_rate": ratio(usage_known, trace_completed),
321 "trace_use_rate": ratio(usage_some, usage_known),
322 "selected_to_used_rate": ratio(selected_used, selected_total),
323 "task_success_rate": ratio(succeeded, outcome_known),
324 "feedback_coverage": ratio(feedback_traces, trace_completed),
325 "feedback_events": feedback_count,
326 "timed_out_traces": trace_timed_out,
327 "pending_evolve_requests": pending_evolve,
328 "failed_evolve_requests_30d": failed_evolve,
329 "failed_distill_logs_30d": failed_distill,
330 "pending_governance_proposals": governance_pending,
331 "window_days": 30,
332 "confidence_distribution": {
333 "low": confidence_row.and_then(|row| row.get("low")).and_then(Value::as_i64).unwrap_or(0),
334 "medium": confidence_row.and_then(|row| row.get("medium")).and_then(Value::as_i64).unwrap_or(0),
335 "high": confidence_row.and_then(|row| row.get("high")).and_then(Value::as_i64).unwrap_or(0),
336 }
337 },
338 "intuition_calibration": intuition,
339 "distill_cost_estimate": {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens},
340 "recurring_sparks": recurring_sparks.len(),
341 "recurring_spark_ids": recurring_spark_ids,
342 "params": {
343 "recall.w_content": self.w_content,
344 "recall.w_trigger": self.w_trigger,
345 "recall.w_context": self.w_context,
346 "recall.w_activation": self.w_activation,
347 "recall.top_k_candidates": self.top_k_candidates,
348 "curate.low_conf_threshold": self.low_conf_threshold,
349 "curate.low_conf_idle_days": self.low_conf_idle_days,
350 "curate.repeat_select_min": self.repeat_select_min,
351 "curate.never_used_age_days": self.never_used_age_days,
352 "curate.promote_used_success_min": self.promote_used_success_min,
353 "curate.promote_confidence_min": self.promote_confidence_min,
354 "curate.screening_timeout_minutes": self.screening_timeout_minutes,
355 "curate.open_ttl_days": self.open_ttl_days,
356 "curate.log_compact_days": self.log_compact_days,
357 "evolve.schedule_interval_hours": self.evolve_schedule_interval_hours,
358 },
359 "suggestions": suggestions
360 }))
361 }
362
363 fn intuition_calibration(&self, window_start: &str) -> Result<Value> {
373 let rows = self.storage.query_chunks_params(
374 "SELECT recall_snapshot, outcome FROM episodic_log
375 WHERE ts >= ? AND recall_snapshot LIKE '%\"appraise\"%'",
376 rusqlite::params![window_start],
377 )?;
378
379 let mut buckets: std::collections::BTreeMap<String, [f64; 4]> =
381 std::collections::BTreeMap::new();
382 for tier in ["weak", "medium", "strong"] {
383 buckets.insert(tier.to_string(), [0.0; 4]);
384 }
385 let mut total = 0_i64;
386 let mut silent = 0_i64;
387 let mut caution_strong = 0_i64;
388 let mut caution_strong_false = 0_i64;
389
390 for row in &rows {
391 let snapshot = row
392 .get("recall_snapshot")
393 .and_then(Value::as_str)
394 .and_then(|raw| serde_json::from_str::<Value>(raw).ok());
395 let Some(appraise) = snapshot.as_ref().and_then(|s| s.get("appraise")) else {
396 continue;
397 };
398 let tier = appraise
399 .get("tier")
400 .and_then(Value::as_str)
401 .unwrap_or("weak");
402 let valence = appraise
403 .get("valence")
404 .and_then(Value::as_str)
405 .unwrap_or("neutral");
406 let strength = appraise
407 .get("strength")
408 .and_then(Value::as_f64)
409 .unwrap_or(0.0);
410 let outcome = row.get("outcome").and_then(Value::as_str);
411
412 total += 1;
413 if tier == "weak" || valence == "neutral" {
414 silent += 1;
415 }
416 let has_outcome = matches!(outcome, Some("ok") | Some("fail"));
417 let is_ok = outcome == Some("ok");
418 if let Some(b) = buckets.get_mut(tier) {
419 b[0] += 1.0;
420 if has_outcome {
421 b[1] += 1.0;
422 b[3] += strength;
423 if is_ok {
424 b[2] += 1.0;
425 }
426 }
427 }
428 if valence == "caution" && tier == "strong" && has_outcome {
429 caution_strong += 1;
430 if is_ok {
431 caution_strong_false += 1;
432 }
433 }
434 }
435
436 let hit_rate = |b: &[f64; 4]| if b[1] > 0.0 { b[2] / b[1] } else { 0.0 };
437 let weak = buckets.get("weak").copied().unwrap_or([0.0; 4]);
438 let strong = buckets.get("strong").copied().unwrap_or([0.0; 4]);
439 let monotonicity_gap = hit_rate(&strong) - hit_rate(&weak);
440
441 let outcome_total: f64 = buckets.values().map(|b| b[1]).sum();
443 let ece = if outcome_total > 0.0 {
444 buckets
445 .values()
446 .filter(|b| b[1] > 0.0)
447 .map(|b| {
448 let avg_strength = b[3] / b[1];
449 (b[1] / outcome_total) * (avg_strength - hit_rate(b)).abs()
450 })
451 .sum::<f64>()
452 } else {
453 0.0
454 };
455
456 let bucket_detail: Vec<Value> = ["weak", "medium", "strong"]
457 .iter()
458 .map(|tier| {
459 let b = buckets.get(*tier).copied().unwrap_or([0.0; 4]);
460 json!({
461 "tier": tier,
462 "n": b[0] as i64,
463 "n_with_outcome": b[1] as i64,
464 "avg_strength": if b[1] > 0.0 { (b[3] / b[1] * 1000.0).round() / 1000.0 } else { 0.0 },
465 "actual_hit_rate": (hit_rate(&b) * 1000.0).round() / 1000.0,
466 })
467 })
468 .collect();
469
470 Ok(json!({
471 "appraisals": total,
472 "monotonicity_gap": (monotonicity_gap * 1000.0).round() / 1000.0,
473 "ece": (ece * 1000.0).round() / 1000.0,
474 "false_alarm_rate": ratio(caution_strong_false, caution_strong),
475 "silence_rate": ratio(silent, total),
476 "buckets": bucket_detail,
477 }))
478 }
479
480 pub fn rebuild_embeddings(&self) -> Result<usize> {
485 let meta_version = self
486 .storage
487 .get_meta("embed_version")?
488 .and_then(|v| v.parse::<i64>().ok())
489 .unwrap_or(1);
490 let stale = self.storage.query_chunks_params(
492 "SELECT id, content, trigger_desc, state_reason FROM chunks
493 WHERE embed_version = 0 OR embed_version < ?",
494 rusqlite::params![meta_version],
495 )?;
496 self.storage.invalidate_vector_caches();
500 let mut count = 0;
501 for row in &stale {
502 let id = match row.get("id").and_then(Value::as_str) {
503 Some(v) => v,
504 None => continue,
505 };
506 let content = row.get("content").and_then(Value::as_str).unwrap_or("");
507 let trigger = row
508 .get("trigger_desc")
509 .and_then(Value::as_str)
510 .unwrap_or(content);
511 let state_reason = row
512 .get("state_reason")
513 .and_then(Value::as_str)
514 .unwrap_or("");
515
516 let cvec = match self.embedding.embed_content(content) {
517 Ok(v) => v,
518 Err(_) => continue,
519 };
520 let tvec = match self.embedding.embed_trigger(trigger) {
521 Ok(v) => v,
522 Err(_) => continue,
523 };
524
525 self.storage.begin_immediate()?;
526 let r = (|| -> Result<()> {
527 self.store_vec_content(id, &cvec)?;
528 self.store_vec_trigger(id, &tvec)?;
529 let new_reason = if state_reason.starts_with("embedding_pending:target=") {
531 let target_state = state_reason.trim_start_matches("embedding_pending:target=");
532 let now = utc_now_iso();
533 self.storage.update_chunk_state(
534 id,
535 target_state,
536 Some("embedding_rebuilt"),
537 &now,
538 )?;
539 "embedding_rebuilt".to_string()
540 } else {
541 "embedding_rebuilt".to_string()
542 };
543 let now = utc_now_iso();
544 self.storage.conn_execute(
545 "UPDATE chunks SET embed_version=?, state_reason=?, updated_at=? WHERE id=?",
546 rusqlite::params![meta_version, new_reason, now, id],
547 )?;
548 self.storage.commit()
549 })();
550 if r.is_err() {
551 let _ = self.storage.rollback();
552 } else {
553 count += 1;
554 }
555 }
556 Ok(count)
557 }
558
559 pub fn inspect_id(&self, id: &str) -> Result<Value> {
564 if let Some(chunk) = self.storage.get_chunk(id)? {
566 let traces = self.storage.query_chunks_params(
567 "SELECT * FROM usage_trace WHERE chunk_id=? ORDER BY ts DESC LIMIT 20",
568 rusqlite::params![id],
569 )?;
570 let derived = self.storage.query_chunks_params(
571 "SELECT id, state, confidence FROM chunks WHERE distilled_from IN (
572 SELECT id FROM episodic_log WHERE trace_id IN (
573 SELECT trace_id FROM usage_trace WHERE chunk_id=?
574 )
575 ) LIMIT 10",
576 rusqlite::params![id],
577 )?;
578 return Ok(json!({
579 "kind": "chunk",
580 "chunk": chunk,
581 "recent_traces": traces,
582 "derived_chunks": derived,
583 }));
584 }
585 if let Some(log) = self.storage.get_episodic_log(id)? {
587 let traces = self.storage.query_chunks_params(
588 "SELECT * FROM usage_trace WHERE trace_id=? ORDER BY ts ASC",
589 rusqlite::params![id],
590 )?;
591 return Ok(json!({
592 "kind": "trace",
593 "episodic_log": log,
594 "usage_traces": traces,
595 }));
596 }
597 Err(InnateError::ChunkNotFound(id.to_string()))
598 }
599
600 pub(super) fn sanitize_content(&self, content: &str) -> (String, SanitizeAction) {
605 self.sanitizer.sanitize(content)
606 }
607}