1use anyhow::Result;
4use rusqlite::{params, Connection};
5use std::path::Path;
6
7#[derive(Debug, Clone, Default)]
11pub struct DecisionInput {
12 pub total_tools: usize,
13 pub failed_tools: usize,
14 pub replans: usize,
15 pub elapsed_ms: u64,
16 pub task_completed: bool,
17 pub task_description: Option<String>,
18 pub rules_used: Vec<String>,
19 pub tools_detail: Vec<ToolExecDetail>,
20}
21
22#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
23pub struct ToolExecDetail {
24 pub tool: String,
25 pub success: bool,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
30pub enum FeedbackSignal {
31 ExplicitPositive,
32 ExplicitNegative,
33 #[default]
34 Neutral,
35}
36
37#[derive(Debug, Clone, Copy, Default, PartialEq)]
38pub struct CoreMetrics {
39 pub first_success_rate: f64,
40 pub avg_replans: f64,
41 pub user_correction_rate: f64,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum EvolutionJudgement {
46 Promote,
47 KeepObserving,
48 Rollback,
49}
50
51impl EvolutionJudgement {
52 pub fn as_str(&self) -> &'static str {
53 match self {
54 Self::Promote => "promote",
55 Self::KeepObserving => "keep_observing",
56 Self::Rollback => "rollback",
57 }
58 }
59
60 pub fn label_zh(&self) -> &'static str {
61 match self {
62 Self::Promote => "保留",
63 Self::KeepObserving => "继续观察",
64 Self::Rollback => "回滚",
65 }
66 }
67}
68
69#[derive(Debug, Clone, PartialEq)]
70pub struct JudgementSummary {
71 pub judgement: EvolutionJudgement,
72 pub current: CoreMetrics,
73 pub baseline: Option<CoreMetrics>,
74 pub reason: String,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub struct RuleHistoryEntry {
79 pub ts: String,
80 pub event_type: String,
81 pub txn_id: String,
82 pub reason: String,
83}
84
85impl FeedbackSignal {
86 pub fn as_str(&self) -> &'static str {
87 match self {
88 Self::ExplicitPositive => "pos",
89 Self::ExplicitNegative => "neg",
90 Self::Neutral => "neutral",
91 }
92 }
93}
94
95pub fn open_evolution_db(chat_root: &Path) -> Result<Connection> {
96 let db_path = chat_root.join("feedback.sqlite");
97 let conn = Connection::open(&db_path)?;
98 conn.execute_batch("PRAGMA foreign_keys=ON;")?;
99 ensure_evolution_tables(&conn)?;
100 Ok(conn)
101}
102pub fn ensure_evolution_tables(conn: &Connection) -> Result<()> {
105 conn.execute_batch(
106 r#"
107 CREATE TABLE IF NOT EXISTS decisions (
108 id INTEGER PRIMARY KEY AUTOINCREMENT,
109 ts TEXT NOT NULL DEFAULT (datetime('now')),
110 session_id TEXT,
111 total_tools INTEGER DEFAULT 0,
112 failed_tools INTEGER DEFAULT 0,
113 replans INTEGER DEFAULT 0,
114 elapsed_ms INTEGER DEFAULT 0,
115 task_completed BOOLEAN DEFAULT 0,
116 feedback TEXT DEFAULT 'neutral',
117 evolved BOOLEAN DEFAULT 0,
118 task_description TEXT,
119 tools_detail TEXT,
120 tool_sequence_key TEXT
121 );
122
123 CREATE TABLE IF NOT EXISTS decision_rules (
124 decision_id INTEGER REFERENCES decisions(id) ON DELETE CASCADE,
125 rule_id TEXT NOT NULL
126 );
127
128 CREATE TABLE IF NOT EXISTS evolution_log (
129 id INTEGER PRIMARY KEY AUTOINCREMENT,
130 ts TEXT NOT NULL DEFAULT (datetime('now')),
131 type TEXT NOT NULL,
132 target_id TEXT,
133 reason TEXT,
134 version TEXT
135 );
136
137 CREATE TABLE IF NOT EXISTS evolution_metrics (
138 date TEXT PRIMARY KEY,
139 first_success_rate REAL,
140 avg_replans REAL,
141 avg_tool_calls REAL,
142 user_correction_rate REAL,
143 evolved_rules INTEGER DEFAULT 0,
144 effective_rules INTEGER DEFAULT 0,
145 egl REAL DEFAULT 0.0
146 );
147
148 CREATE INDEX IF NOT EXISTS idx_decisions_evolved ON decisions(evolved);
149 CREATE INDEX IF NOT EXISTS idx_decisions_ts ON decisions(ts);
150 CREATE INDEX IF NOT EXISTS idx_dr_rule ON decision_rules(rule_id);
151 CREATE INDEX IF NOT EXISTS idx_dr_decision ON decision_rules(decision_id);
152 CREATE INDEX IF NOT EXISTS idx_evo_log_ts ON evolution_log(ts);
153 "#,
154 )?;
155 let _ = conn.execute(
157 "ALTER TABLE decisions ADD COLUMN tool_sequence_key TEXT",
158 [],
159 );
160 let _ = conn.execute(
162 "CREATE INDEX IF NOT EXISTS idx_decisions_seq ON decisions(tool_sequence_key)",
163 [],
164 );
165 Ok(())
166}
167
168pub fn compute_tool_sequence_key(tools_detail: &[ToolExecDetail]) -> Option<String> {
172 if tools_detail.is_empty() {
173 return None;
174 }
175 let key = tools_detail
176 .iter()
177 .take(3)
178 .map(|t| t.tool.as_str())
179 .collect::<Vec<_>>()
180 .join("→");
181 Some(key)
182}
183
184pub fn insert_decision(
187 conn: &Connection,
188 session_id: Option<&str>,
189 feedback: &DecisionInput,
190 user_feedback: FeedbackSignal,
191) -> Result<i64> {
192 let tools_detail_json = serde_json::to_string(&feedback.tools_detail).unwrap_or_default();
193 let tool_sequence_key = compute_tool_sequence_key(&feedback.tools_detail);
194
195 conn.execute(
196 "INSERT INTO decisions (session_id, total_tools, failed_tools, replans,
197 elapsed_ms, task_completed, feedback, task_description, tools_detail, tool_sequence_key)
198 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
199 params![
200 session_id,
201 feedback.total_tools as i64,
202 feedback.failed_tools as i64,
203 feedback.replans as i64,
204 feedback.elapsed_ms as i64,
205 feedback.task_completed,
206 user_feedback.as_str(),
207 feedback.task_description,
208 tools_detail_json,
209 tool_sequence_key,
210 ],
211 )?;
212 let decision_id = conn.last_insert_rowid();
213
214 if !feedback.rules_used.is_empty() {
215 let mut stmt =
216 conn.prepare("INSERT INTO decision_rules (decision_id, rule_id) VALUES (?1, ?2)")?;
217 for rule_id in &feedback.rules_used {
218 stmt.execute(params![decision_id, rule_id])?;
219 }
220 }
221
222 Ok(decision_id)
223}
224
225pub fn count_unprocessed_decisions(conn: &Connection) -> Result<i64> {
226 conn.query_row(
227 "SELECT COUNT(*) FROM decisions WHERE evolved = 0",
228 [],
229 |r| r.get(0),
230 )
231 .map_err(Into::into)
232}
233
234pub fn count_decisions_with_task_desc(conn: &Connection) -> Result<(i64, i64)> {
237 let total: i64 = conn.query_row(
238 "SELECT COUNT(*) FROM decisions WHERE evolved = 0",
239 [],
240 |r| r.get(0),
241 )?;
242 let with_desc: i64 = conn.query_row(
243 "SELECT COUNT(*) FROM decisions WHERE evolved = 0 AND task_description IS NOT NULL",
244 [],
245 |r| r.get(0),
246 )?;
247 Ok((total, with_desc))
248}
249
250pub fn update_last_decision_feedback(
251 conn: &Connection,
252 session_id: &str,
253 feedback: FeedbackSignal,
254) -> Result<()> {
255 conn.execute(
256 "UPDATE decisions SET feedback = ?1
257 WHERE id = (SELECT id FROM decisions WHERE session_id = ?2 ORDER BY ts DESC LIMIT 1)",
258 params![feedback.as_str(), session_id],
259 )?;
260 Ok(())
261}
262
263pub fn compute_effectiveness(conn: &Connection, rule_id: &str) -> Result<f32> {
266 let result: Result<(i64, i64), _> = conn.query_row(
267 "SELECT
268 COUNT(CASE WHEN d.task_completed = 1 AND d.feedback != 'neg' THEN 1 END),
269 COUNT(*)
270 FROM decisions d
271 JOIN decision_rules dr ON d.id = dr.decision_id
272 WHERE dr.rule_id = ?1 AND d.ts > datetime('now', '-30 days')",
273 params![rule_id],
274 |row| Ok((row.get(0)?, row.get(1)?)),
275 );
276 match result {
277 Ok((success, total)) => {
278 if total < 3 {
279 Ok(-1.0)
280 } else {
281 Ok(success as f32 / total as f32)
282 }
283 }
284 Err(_) => Ok(-1.0),
285 }
286}
287
288pub fn query_rule_history(conn: &Connection, rule_id: &str) -> Result<Vec<RuleHistoryEntry>> {
289 let mut stmt = conn.prepare(
290 "SELECT ts, type, COALESCE(version, ''), COALESCE(reason, '')
291 FROM evolution_log
292 WHERE target_id = ?1
293 ORDER BY ts DESC",
294 )?;
295
296 let rows = stmt.query_map(params![rule_id], |row| {
297 Ok(RuleHistoryEntry {
298 ts: row.get(0)?,
299 event_type: row.get(1)?,
300 txn_id: row.get(2)?,
301 reason: row.get(3)?,
302 })
303 })?;
304
305 let entries = rows.collect::<std::result::Result<Vec<_>, _>>()?;
306 Ok(entries)
307}
308
309pub fn update_daily_metrics(conn: &Connection) -> Result<()> {
312 let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
313 let core = compute_core_metrics_for_date(conn, &today)?;
314
315 let avg_tool_calls: f64 = conn
316 .query_row(
317 "SELECT COALESCE(AVG(CAST(total_tools AS REAL)), 0.0)
318 FROM decisions
319 WHERE date(ts) = ?1 AND total_tools >= 1",
320 params![today],
321 |row| row.get(0),
322 )
323 .unwrap_or(0.0);
324 let egl = compute_egl(conn, &today).unwrap_or(0.0);
325
326 conn.execute(
327 "INSERT INTO evolution_metrics (date, first_success_rate, avg_replans,
328 avg_tool_calls, user_correction_rate, egl)
329 VALUES (?1, ?2, ?3, ?4, ?5, ?6)
330 ON CONFLICT(date) DO UPDATE SET
331 first_success_rate = ?2, avg_replans = ?3,
332 avg_tool_calls = ?4, user_correction_rate = ?5, egl = ?6",
333 params![
334 today,
335 core.first_success_rate,
336 core.avg_replans,
337 avg_tool_calls,
338 core.user_correction_rate,
339 egl
340 ],
341 )?;
342
343 Ok(())
344}
345
346pub fn compute_core_metrics_for_date(conn: &Connection, date: &str) -> Result<CoreMetrics> {
347 let (success_count, total_count): (i64, i64) = conn.query_row(
349 "SELECT
350 COUNT(CASE WHEN task_completed = 1 AND feedback != 'neg' THEN 1 END),
351 COUNT(*)
352 FROM decisions
353 WHERE date(ts) = ?1",
354 params![date],
355 |row| Ok((row.get(0)?, row.get(1)?)),
356 )?;
357 let first_success_rate = if total_count > 0 {
358 success_count as f64 / total_count as f64
359 } else {
360 0.0
361 };
362
363 let avg_replans: f64 = conn
365 .query_row(
366 "SELECT COALESCE(AVG(CAST(replans AS REAL)), 0.0) FROM decisions WHERE date(ts) = ?1",
367 params![date],
368 |row| row.get(0),
369 )
370 .unwrap_or(0.0);
371
372 let (pos_feedback_count, neg_feedback_count): (i64, i64) = conn.query_row(
374 "SELECT
375 COUNT(CASE WHEN feedback = 'pos' THEN 1 END),
376 COUNT(CASE WHEN feedback = 'neg' THEN 1 END)
377 FROM decisions
378 WHERE date(ts) = ?1",
379 params![date],
380 |row| Ok((row.get(0)?, row.get(1)?)),
381 )?;
382 let user_correction_rate = if (pos_feedback_count + neg_feedback_count) > 0 {
383 neg_feedback_count as f64 / (pos_feedback_count + neg_feedback_count) as f64
384 } else {
385 0.0
386 };
387
388 Ok(CoreMetrics {
389 first_success_rate,
390 avg_replans,
391 user_correction_rate,
392 })
393}
394
395pub fn compute_egl_for_rule(conn: &Connection, rule_id: &str) -> Result<f64> {
405 let (success_count, total_count, total_replans, pos_feedback, neg_feedback): (
406 i64,
407 i64,
408 i64,
409 i64,
410 i64,
411 ) = conn.query_row(
412 "SELECT
413 COUNT(CASE WHEN d.task_completed = 1 AND d.feedback != 'neg' THEN 1 END),
414 COUNT(*),
415 SUM(d.replans),
416 COUNT(CASE WHEN d.feedback = 'pos' THEN 1 END),
417 COUNT(CASE WHEN d.feedback = 'neg' THEN 1 END)
418 FROM decisions d
419 JOIN decision_rules dr ON d.id = dr.decision_id
420 WHERE dr.rule_id = ?1 AND d.ts > datetime('now', '-30 days')", params![rule_id],
422 |row| {
423 Ok((
424 row.get(0)?,
425 row.get(1)?,
426 row.get(2)?,
427 row.get(3)?,
428 row.get(4)?,
429 ))
430 },
431 )?;
432
433 if total_count == 0 {
434 return Ok(0.0);
435 }
436
437 let success_rate = success_count as f64 / total_count as f64;
438 let avg_replans = total_replans as f64 / total_count as f64;
439 let user_correction_rate = if (pos_feedback + neg_feedback) > 0 {
440 neg_feedback as f64 / (pos_feedback + neg_feedback) as f64
441 } else {
442 0.0
443 };
444
445 let w_success = 1.0;
447 let w_replans = 0.5;
448 let w_correction = 0.7;
449
450 let egl = (success_rate * w_success)
451 - (avg_replans * w_replans)
452 - (user_correction_rate * w_correction);
453 Ok(egl)
454}
455
456pub fn compute_egl(conn: &Connection, date: &str) -> Result<f64> {
458 let metrics = compute_core_metrics_for_date(conn, date)?;
459
460 let w_success = 1.0;
462 let w_replans = 0.5;
463 let w_correction = 0.7;
464
465 let egl = (metrics.first_success_rate * w_success)
466 - (metrics.avg_replans * w_replans)
467 - (metrics.user_correction_rate * w_correction);
468 Ok(egl)
469}
470
471pub fn fetch_latest_metrics(conn: &Connection) -> Result<Option<CoreMetrics>> {
472 let mut stmt = conn.prepare(
473 "SELECT first_success_rate, avg_replans, user_correction_rate
474 FROM evolution_metrics ORDER BY date DESC LIMIT 1",
475 )?;
476 let metrics = stmt.query_row([], |row| {
477 Ok(CoreMetrics {
478 first_success_rate: row.get(0)?,
479 avg_replans: row.get(1)?,
480 user_correction_rate: row.get(2)?,
481 })
482 });
483 match metrics {
484 Ok(m) => Ok(Some(m)),
485 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
486 Err(e) => Err(e.into()),
487 }
488}
489
490pub fn build_latest_judgement(conn: &Connection) -> Result<Option<JudgementSummary>> {
491 let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
492 let current_metrics = compute_core_metrics_for_date(conn, &today)?;
493 let baseline_metrics = fetch_latest_metrics(conn)?;
494
495 if let Some(baseline) = baseline_metrics {
496 let mut reason_parts = vec![];
497 let mut promote_score = 0; let mut degrade_score = 0;
499
500 if current_metrics.first_success_rate > baseline.first_success_rate {
501 reason_parts.push(format!(
502 "首次成功率从 {:.2}% 提升到 {:.2}%",
503 baseline.first_success_rate * 100.0,
504 current_metrics.first_success_rate * 100.0
505 ));
506 promote_score += 1;
507 } else if current_metrics.first_success_rate < baseline.first_success_rate {
508 reason_parts.push(format!(
509 "首次成功率从 {:.2}% 下降到 {:.2}%",
510 baseline.first_success_rate * 100.0,
511 current_metrics.first_success_rate * 100.0
512 ));
513 degrade_score += 1;
514 }
515
516 if current_metrics.avg_replans < baseline.avg_replans {
517 reason_parts.push(format!(
518 "平均重试次数从 {:.2} 减少到 {:.2}",
519 baseline.avg_replans, current_metrics.avg_replans
520 ));
521 promote_score += 1;
522 } else if current_metrics.avg_replans > baseline.avg_replans {
523 reason_parts.push(format!(
524 "平均重试次数从 {:.2} 增加到 {:.2}",
525 baseline.avg_replans, current_metrics.avg_replans
526 ));
527 degrade_score += 1;
528 }
529
530 if current_metrics.user_correction_rate < baseline.user_correction_rate {
531 reason_parts.push(format!(
532 "用户修正率从 {:.2}% 减少到 {:.2}%",
533 baseline.user_correction_rate * 100.0,
534 current_metrics.user_correction_rate * 100.0
535 ));
536 promote_score += 1;
537 } else if current_metrics.user_correction_rate > baseline.user_correction_rate {
538 reason_parts.push(format!(
539 "用户修正率从 {:.2}% 增加到 {:.2}%",
540 baseline.user_correction_rate * 100.0,
541 current_metrics.user_correction_rate * 100.0
542 ));
543 degrade_score += 1;
544 }
545
546 let judgement = if promote_score > degrade_score && promote_score > 0 {
547 EvolutionJudgement::Promote
548 } else if degrade_score > promote_score && degrade_score > 0 {
549 EvolutionJudgement::Rollback
550 } else {
551 EvolutionJudgement::KeepObserving
552 };
553
554 let reason = if reason_parts.is_empty() {
555 "指标无显著变化".to_string()
556 } else {
557 reason_parts.join(",")
558 };
559
560 Ok(Some(JudgementSummary {
561 judgement,
562 current: current_metrics,
563 baseline: Some(baseline),
564 reason,
565 }))
566 } else {
567 Ok(Some(JudgementSummary {
569 judgement: EvolutionJudgement::KeepObserving,
570 current: current_metrics,
571 baseline: None,
572 reason: "无基线数据,继续观察".to_string(),
573 }))
574 }
575}
576
577pub fn log_evolution_event(
580 conn: &Connection,
581 event_type: &str,
582 target_id: Option<&str>,
583 reason: Option<&str>,
584 version: Option<&str>,
585) -> Result<i64> {
586 conn.execute(
587 "INSERT INTO evolution_log (type, target_id, reason, version) VALUES (?1, ?2, ?3, ?4)",
588 params![event_type, target_id, reason, version],
589 )?;
590 Ok(conn.last_insert_rowid())
591}
592
593#[cfg(test)]
596mod tests {
597 use super::*;
598 use rusqlite::Connection;
599
600 fn setup_conn() -> Connection {
601 let conn = Connection::open_in_memory().unwrap();
602 ensure_evolution_tables(&conn).unwrap();
603 conn
604 }
605
606 #[test]
607 fn test_ensure_evolution_tables() {
608 let conn = setup_conn();
609 let tables = conn
610 .query_row(
611 "SELECT name FROM sqlite_master WHERE type='table' AND name='decisions'",
612 [],
613 |r| r.get::<_, String>(0),
614 )
615 .unwrap();
616 assert_eq!(tables, "decisions");
617 }
618
619 #[test]
620 fn test_insert_decision() {
621 let conn = setup_conn();
622 let input = DecisionInput {
623 total_tools: 1,
624 failed_tools: 0,
625 replans: 0,
626 elapsed_ms: 100,
627 task_completed: true,
628 task_description: Some("test task".to_string()),
629 rules_used: vec![],
630 tools_detail: vec![],
631 };
632 let decision_id =
633 insert_decision(&conn, Some("session1"), &input, FeedbackSignal::Neutral).unwrap();
634 assert!(decision_id > 0);
635
636 let count: i64 = conn
637 .query_row("SELECT COUNT(*) FROM decisions", [], |r| r.get(0))
638 .unwrap();
639 assert_eq!(count, 1);
640 }
641
642 #[test]
643 fn test_update_last_decision_feedback() {
644 let conn = setup_conn();
645 let input = DecisionInput {
646 total_tools: 1,
647 failed_tools: 0,
648 replans: 0,
649 elapsed_ms: 100,
650 task_completed: true,
651 task_description: Some("test task".to_string()),
652 rules_used: vec![],
653 tools_detail: vec![],
654 };
655 insert_decision(&conn, Some("s1"), &input, FeedbackSignal::Neutral).unwrap();
656 update_last_decision_feedback(&conn, "s1", FeedbackSignal::ExplicitPositive).unwrap();
657
658 let feedback: String = conn
659 .query_row(
660 "SELECT feedback FROM decisions WHERE session_id = 's1'",
661 [],
662 |r| r.get(0),
663 )
664 .unwrap();
665 assert_eq!(feedback, "pos");
666 }
667
668 #[test]
669 fn test_compute_effectiveness() {
670 let conn = setup_conn();
671 conn.execute_batch(
672 "INSERT INTO decisions (ts, task_completed, feedback) VALUES
673 ('2026-03-14 09:00:00', 1, 'pos'),
674 ('2026-03-14 10:00:00', 0, 'neg'),
675 ('2026-03-14 11:00:00', 1, 'neutral'),
676 ('2026-03-14 12:00:00', 1, 'pos');
677 INSERT INTO decision_rules (decision_id, rule_id) VALUES
678 (1, 'test-rule'), (2, 'test-rule'), (3, 'test-rule'), (4, 'test-rule');",
679 )
680 .unwrap();
681
682 let effectiveness = compute_effectiveness(&conn, "test-rule").unwrap();
683 assert!((effectiveness - 0.75).abs() < 1e-6);
685 }
686
687 #[test]
688 fn test_compute_effectiveness_less_than_three_decisions() {
689 let conn = setup_conn();
690 conn.execute_batch(
691 "INSERT INTO decisions (ts, task_completed, feedback) VALUES
692 ('2026-03-14 09:00:00', 1, 'pos'),
693 ('2026-03-14 10:00:00', 1, 'neutral');
694 INSERT INTO decision_rules (decision_id, rule_id) VALUES
695 (1, 'test-rule-2'), (2, 'test-rule-2');",
696 )
697 .unwrap();
698
699 let effectiveness = compute_effectiveness(&conn, "test-rule-2").unwrap();
700 assert!((effectiveness - -1.0).abs() < 1e-6);
701 }
702
703 #[test]
704 fn test_query_rule_history_returns_events_for_rule() {
705 let conn = setup_conn();
706 conn.execute_batch(
707 "INSERT INTO evolution_log (ts, type, target_id, reason, version) VALUES
708 ('2026-03-14T09:00:00Z', 'rule_added', 'rule-a', 'seeded', 'txn-1'),
709 ('2026-03-14T10:00:00Z', 'rule_promoted', 'rule-a', 'effective', 'txn-2'),
710 ('2026-03-14T11:00:00Z', 'rule_added', 'rule-b', 'other', 'txn-3')",
711 )
712 .unwrap();
713
714 let history = query_rule_history(&conn, "rule-a").unwrap();
715 assert_eq!(history.len(), 2);
716 assert_eq!(history[0].event_type, "rule_promoted");
717 assert_eq!(history[0].txn_id, "txn-2");
718 assert_eq!(history[0].reason, "effective");
719 assert_eq!(history[1].event_type, "rule_added");
720 assert_eq!(history[1].txn_id, "txn-1");
721 }
722
723 #[test]
724 fn test_compute_tool_sequence_key() {
725 let tools_detail = vec![
726 ToolExecDetail {
727 tool: "read_file".to_string(),
728 success: true,
729 },
730 ToolExecDetail {
731 tool: "write_file".to_string(),
732 success: true,
733 },
734 ToolExecDetail {
735 tool: "run_command".to_string(),
736 success: false,
737 },
738 ToolExecDetail {
739 tool: "http_request".to_string(),
740 success: true,
741 }, ];
743 let key = compute_tool_sequence_key(&tools_detail);
744 assert_eq!(key, Some("read_file→write_file→run_command".to_string()));
745
746 let empty_detail: Vec<ToolExecDetail> = vec![];
747 let key = compute_tool_sequence_key(&empty_detail);
748 assert_eq!(key, None);
749
750 let single_detail = vec![ToolExecDetail {
751 tool: "list_directory".to_string(),
752 success: true,
753 }];
754 let key = compute_tool_sequence_key(&single_detail);
755 assert_eq!(key, Some("list_directory".to_string()));
756 }
757
758 #[test]
759 fn test_insert_decision_with_rules() {
760 let conn = setup_conn();
761 let input = DecisionInput {
762 total_tools: 2,
763 failed_tools: 0,
764 replans: 0,
765 elapsed_ms: 100,
766 task_completed: true,
767 task_description: Some("test".to_string()),
768 rules_used: vec!["rule-a".to_string(), "rule-b".to_string()],
769 tools_detail: vec![ToolExecDetail {
770 tool: "read_file".to_string(),
771 success: true,
772 }],
773 };
774
775 let id = insert_decision(&conn, Some("s1"), &input, FeedbackSignal::Neutral).unwrap();
776 let mut stmt = conn
777 .prepare("SELECT rule_id FROM decision_rules WHERE decision_id = ?1 ORDER BY rule_id")
778 .unwrap();
779 let rows: Vec<String> = stmt
780 .query_map(params![id], |row| row.get(0))
781 .unwrap()
782 .collect::<std::result::Result<Vec<_>, _>>()
783 .unwrap();
784
785 assert_eq!(rows, vec!["rule-a".to_string(), "rule-b".to_string()]);
786 }
787
788 #[test]
789 fn test_compute_core_metrics_for_date_uses_minimal_metrics() {
790 let conn = setup_conn();
791 conn.execute(
792 "INSERT INTO decisions (ts, total_tools, replans, task_completed, feedback)
793 VALUES
794 ('2026-03-14 09:00:00', 1, 0, 1, 'neutral'),
795 ('2026-03-14 10:00:00', 2, 1, 1, 'neg'),
796 ('2026-03-14 11:00:00', 1, 2, 0, 'pos')",
797 [],
798 )
799 .unwrap();
800
801 let metrics = compute_core_metrics_for_date(&conn, "2026-03-14").unwrap();
802 assert!((metrics.first_success_rate - (1.0 / 3.0)).abs() < 1e-6);
803 assert!((metrics.avg_replans - 1.0).abs() < 1e-6);
804 assert!((metrics.user_correction_rate - 0.5).abs() < 1e-6);
805 }
806
807 #[test]
808 fn test_compute_core_metrics_for_date_avg_replans_and_user_correction_rate_extended() {
809 let conn = setup_conn();
810 conn.execute_batch(
811 "INSERT INTO decisions (ts, total_tools, replans, task_completed, feedback)
812 VALUES
813 ('2026-03-15 09:00:00', 1, 0, 1, 'neutral'),
814 ('2026-03-15 10:00:00', 2, 1, 1, 'neg'),
815 ('2026-03-15 11:00:00', 1, 2, 0, 'pos'),
816 ('2026-03-15 12:00:00', 3, 3, 1, 'neutral'),
817 ('2026-03-15 13:00:00', 1, 0, 1, 'pos'),
818 ('2026-03-15 14:00:00', 2, 1, 0, 'neg'),
819 ('2026-03-15 15:00:00', 1, 0, 1, 'neutral')",
820 )
821 .unwrap();
822
823 let metrics = compute_core_metrics_for_date(&conn, "2026-03-15").unwrap();
824 assert!((metrics.avg_replans - 1.0).abs() < 1e-6);
826 assert!((metrics.user_correction_rate - 0.5).abs() < 1e-6);
828 }
829
830 #[test]
831 fn test_build_latest_judgement_promotes_improving_metrics() {
832 let conn = setup_conn();
833 conn.execute(
834 "INSERT INTO evolution_metrics (date, first_success_rate, avg_replans, avg_tool_calls, user_correction_rate, egl)
835 VALUES
836 ('2026-03-10', 0.40, 1.5, 3.0, 0.30, 0.0),
837 ('2026-03-11', 0.50, 1.4, 3.0, 0.20, 0.0),
838 ('2026-03-12', 0.55, 1.2, 3.0, 0.15, 0.0),
839 ('2026-03-14', 0.72, 0.8, 2.5, 0.10, 0.0)",
840 [],
841 )
842 .unwrap();
843
844 let summary = build_latest_judgement(&conn).unwrap().unwrap();
845 assert_eq!(summary.judgement, EvolutionJudgement::Promote);
846 }
847}