1use std::path::Path;
12
13use anyhow::Result;
14use rusqlite::Connection;
15
16use crate::feedback::open_evolution_db;
17use skilllite_core::planning::{PlanningRule, SourceEntry, SourceRegistry};
18
19use skilllite_fs::atomic_write;
20use crate::gatekeeper_l3_content;
22use crate::log_evolution_event;
23use crate::seed;
24use crate::EvolutionLlm;
25use crate::EvolutionMessage;
26
27const EMA_ALPHA: f32 = 0.3;
30const CN_TIMEOUT_SECS: u64 = 5;
31const GLOBAL_TIMEOUT_SECS: u64 = 15;
32const MAX_FETCHES_PER_RUN: usize = 3;
34const MAX_RUNS_PER_DAY: i64 = 3;
36const PAUSE_ACCESSIBILITY_THRESHOLD: f32 = 0.15;
38const PAUSE_MIN_FAIL_COUNT: u32 = 7;
40const RETIRE_QUALITY_THRESHOLD: f32 = 0.20;
42const RETIRE_MIN_FETCHES: u32 = 30;
44
45const EXTERNAL_KNOWLEDGE_PROMPT: &str =
46 include_str!("seed/evolution_prompts/external_knowledge.seed.md");
47
48pub fn should_run_external_learning(conn: &Connection) -> bool {
52 let enabled = std::env::var("SKILLLITE_EXTERNAL_LEARNING")
54 .ok()
55 .as_deref()
56 .map(|v| v == "1" || v == "true")
57 .unwrap_or(false);
58 if !enabled {
59 return false;
60 }
61
62 let runs_today: i64 = conn
64 .query_row(
65 "SELECT COUNT(*) FROM evolution_log
66 WHERE type = 'external_fetch_run' AND date(ts) = date('now')",
67 [],
68 |row| row.get(0),
69 )
70 .unwrap_or(0);
71
72 if runs_today >= MAX_RUNS_PER_DAY {
73 tracing::debug!(
74 "External learning daily cap reached ({}/{})",
75 runs_today,
76 MAX_RUNS_PER_DAY
77 );
78 return false;
79 }
80
81 true
82}
83
84fn prioritize_sources(sources: &[SourceEntry]) -> Vec<&SourceEntry> {
88 let mut enabled: Vec<&SourceEntry> = sources.iter().filter(|s| s.enabled).collect();
89
90 enabled.sort_by(|a, b| {
91 let region_ord = match (a.region.as_str(), b.region.as_str()) {
93 ("cn", "cn") | ("global", "global") => std::cmp::Ordering::Equal,
94 ("cn", _) => std::cmp::Ordering::Less,
95 (_, "cn") => std::cmp::Ordering::Greater,
96 _ => std::cmp::Ordering::Equal,
97 };
98 if region_ord != std::cmp::Ordering::Equal {
99 return region_ord;
100 }
101 let score_a = a.accessibility_score * a.quality_score;
103 let score_b = b.accessibility_score * b.quality_score;
104 score_b
105 .partial_cmp(&score_a)
106 .unwrap_or(std::cmp::Ordering::Equal)
107 });
108
109 enabled
110}
111
112fn update_accessibility(source: &mut SourceEntry, success: bool) {
116 let result = if success { 1.0_f32 } else { 0.0_f32 };
117 source.accessibility_score =
118 EMA_ALPHA * result + (1.0 - EMA_ALPHA) * source.accessibility_score;
119 if success {
120 source.fetch_success_count += 1;
121 } else {
122 source.fetch_fail_count += 1;
123 }
124 source.last_fetched = Some(chrono::Utc::now().to_rfc3339());
125}
126
127async fn fetch_source(source: &SourceEntry) -> Result<String> {
131 let timeout_secs = if source.region == "cn" {
132 CN_TIMEOUT_SECS
133 } else {
134 GLOBAL_TIMEOUT_SECS
135 };
136 let timeout = std::time::Duration::from_secs(timeout_secs);
137
138 let client = reqwest::Client::builder()
139 .timeout(timeout)
140 .user_agent("SkillLite/1.0 (external-learning)")
141 .build()?;
142
143 let response = if source.parser == "juejin" {
145 let body = serde_json::json!({
146 "id_type": 2,
147 "client_type": 2608,
148 "cursor": "0",
149 "limit": 20
150 });
151 client.post(&source.url).json(&body).send().await?
152 } else {
153 client.get(&source.url).send().await?
154 };
155
156 if !response.status().is_success() {
157 anyhow::bail!("HTTP {} from {}", response.status(), source.url);
158 }
159
160 Ok(response.text().await?)
161}
162
163fn parse_content(source: &SourceEntry, raw: &str) -> Vec<(String, String)> {
167 match source.parser.as_str() {
168 "juejin" => parse_juejin_json(raw),
169 "infoq_cn" => parse_infoq_json(raw),
170 "hn_algolia" => parse_hn_algolia_json(raw),
171 "rss_generic" => parse_rss(raw),
172 "github_trending_html" => parse_github_trending(raw),
173 _ => parse_rss(raw), }
175}
176
177fn parse_juejin_json(raw: &str) -> Vec<(String, String)> {
178 let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
179 return Vec::new();
180 };
181 let items = v["data"].as_array().cloned().unwrap_or_default();
182 items
183 .iter()
184 .take(10)
185 .filter_map(|item| {
186 let title = item["article_info"]["title"].as_str()?.to_string();
187 let brief = item["article_info"]["brief_content"]
188 .as_str()
189 .unwrap_or("")
190 .chars()
191 .take(120)
192 .collect::<String>();
193 Some((title, brief))
194 })
195 .collect()
196}
197
198fn parse_infoq_json(raw: &str) -> Vec<(String, String)> {
199 let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
200 return Vec::new();
201 };
202 let items = v["data"].as_array().cloned().unwrap_or_default();
203 items
204 .iter()
205 .take(10)
206 .filter_map(|item| {
207 let title = item["article"]["title"].as_str()?.to_string();
208 let summary = item["article"]["summary"]
209 .as_str()
210 .unwrap_or("")
211 .chars()
212 .take(120)
213 .collect::<String>();
214 Some((title, summary))
215 })
216 .collect()
217}
218
219fn parse_rss(raw: &str) -> Vec<(String, String)> {
220 let mut results = Vec::new();
222 let items: Vec<&str> = raw.split("<item>").skip(1).collect();
223 for item in items.iter().take(10) {
224 let title = extract_xml_tag(item, "title").unwrap_or_default();
225 let desc = extract_xml_tag(item, "description").unwrap_or_default();
226 let desc_clean = strip_html_basic(&desc)
228 .chars()
229 .take(120)
230 .collect::<String>();
231 if !title.is_empty() {
232 results.push((title, desc_clean));
233 }
234 }
235 results
236}
237
238fn parse_github_trending(raw: &str) -> Vec<(String, String)> {
239 let mut results = Vec::new();
241 let mut search = raw;
242 while let Some(start) = search.find("h2 class=\"h3 lh-condensed\"") {
243 search = &search[start + 26..];
244 if let Some(link_start) = search.find("<a href=\"/") {
245 let after = &search[link_start + 9..];
246 if let Some(end) = after.find('"') {
247 let repo_path = after[..end].to_string();
248 let desc = if let Some(p_start) = search.find("<p ") {
250 let p_content = &search[p_start..];
251 if let Some(close) = p_content.find("</p>") {
252 let inner = &p_content[..close];
253 strip_html_basic(inner).trim().chars().take(100).collect()
254 } else {
255 String::new()
256 }
257 } else {
258 String::new()
259 };
260 results.push((repo_path, desc));
261 if results.len() >= 10 {
262 break;
263 }
264 }
265 }
266 }
267 results
268}
269
270fn parse_hn_algolia_json(raw: &str) -> Vec<(String, String)> {
271 let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
272 return Vec::new();
273 };
274 let hits = v["hits"].as_array().cloned().unwrap_or_default();
275 hits.iter()
276 .take(10)
277 .filter_map(|hit| {
278 let title = hit["title"].as_str()?.to_string();
279 let url = hit["url"].as_str().unwrap_or("").to_string();
280 Some((title, url))
281 })
282 .collect()
283}
284
285fn extract_xml_tag(text: &str, tag: &str) -> Option<String> {
286 let open = format!("<{}", tag);
287 let close = format!("</{}>", tag);
288 let start = text.find(&open)?;
289 let content_start = text[start..].find('>')? + start + 1;
290 let end = text[content_start..].find(&close)? + content_start;
291 let raw = &text[content_start..end];
292 let unescaped = raw
294 .replace("&", "&")
295 .replace("<", "<")
296 .replace(">", ">")
297 .replace(""", "\"")
298 .replace("'", "'")
299 .replace("<![CDATA[", "")
300 .replace("]]>", "");
301 Some(unescaped.trim().to_string())
302}
303
304fn strip_html_basic(html: &str) -> String {
305 let mut out = String::with_capacity(html.len());
306 let mut in_tag = false;
307 for ch in html.chars() {
308 match ch {
309 '<' => in_tag = true,
310 '>' => in_tag = false,
311 _ if !in_tag => out.push(ch),
312 _ => {}
313 }
314 }
315 out
316}
317
318async fn extract_rules_from_content<L: EvolutionLlm>(
322 articles: &[(String, String)],
323 domains: &[String],
324 existing_summary: &str,
325 llm: &L,
326 model: &str,
327) -> Result<Vec<PlanningRule>> {
328 if articles.is_empty() {
329 return Ok(Vec::new());
330 }
331
332 let article_content = articles
334 .iter()
335 .enumerate()
336 .map(|(i, (title, snippet))| {
337 if snippet.is_empty() {
338 format!("{}. {}", i + 1, title)
339 } else {
340 format!("{}. {}\n {}", i + 1, title, snippet)
341 }
342 })
343 .collect::<Vec<_>>()
344 .join("\n");
345
346 let domains_str = domains.join(", ");
347
348 let prompt = EXTERNAL_KNOWLEDGE_PROMPT
349 .replace("{{domains}}", &domains_str)
350 .replace("{{article_content}}", &article_content)
351 .replace("{{existing_rules_summary}}", existing_summary);
352
353 let messages = vec![EvolutionMessage::user(&prompt)];
354 let content = llm
355 .complete(&messages, model, 0.3)
356 .await?
357 .trim()
358 .to_string();
359
360 if content.is_empty() {
361 return Ok(Vec::new());
362 }
363
364 parse_external_rule_response(&content)
365}
366
367fn parse_external_rule_response(content: &str) -> Result<Vec<PlanningRule>> {
368 let json_str = extract_json_array(content);
370
371 let arr: Vec<serde_json::Value> = serde_json::from_str(&json_str).map_err(|e| {
372 anyhow::anyhow!(
373 "Failed to parse external rule JSON: {}: raw={:.200}",
374 e,
375 content
376 )
377 })?;
378
379 let mut rules = Vec::new();
380 for val in arr {
381 let id = val["id"].as_str().unwrap_or("").to_string();
382 if id.is_empty() || !id.starts_with("ext_") {
383 tracing::warn!("External rule rejected: id '{}' must start with 'ext_'", id);
384 continue;
385 }
386 let instruction = val["instruction"].as_str().unwrap_or("").to_string();
387 if instruction.is_empty() || instruction.len() > 200 {
388 continue;
389 }
390 if let Err(e) = gatekeeper_l3_content(&instruction) {
392 tracing::warn!("L3 rejected external rule {}: {}", id, e);
393 continue;
394 }
395 let priority = val["priority"].as_u64().unwrap_or(50).clamp(45, 55) as u32;
396 let keywords: Vec<String> = val["keywords"]
397 .as_array()
398 .map(|a| {
399 a.iter()
400 .filter_map(|v| v.as_str().map(String::from))
401 .collect()
402 })
403 .unwrap_or_default();
404 let context_keywords: Vec<String> = val["context_keywords"]
405 .as_array()
406 .map(|a| {
407 a.iter()
408 .filter_map(|v| v.as_str().map(String::from))
409 .collect()
410 })
411 .unwrap_or_default();
412 let tool_hint = val["tool_hint"]
413 .as_str()
414 .filter(|s| !s.is_empty() && *s != "null")
415 .map(String::from);
416
417 rules.push(PlanningRule {
418 id,
419 priority,
420 keywords,
421 context_keywords,
422 tool_hint,
423 instruction,
424 mutable: true,
425 origin: "external".to_string(),
426 reusable: false,
427 effectiveness: None,
428 trigger_count: None,
429 });
430 }
431
432 Ok(rules)
433}
434
435fn extract_json_array(content: &str) -> String {
436 let stripped = content
438 .trim()
439 .trim_start_matches("```json")
440 .trim_start_matches("```")
441 .trim_end_matches("```")
442 .trim();
443 if let (Some(start), Some(end)) = (stripped.find('['), stripped.rfind(']')) {
445 stripped[start..=end].to_string()
446 } else {
447 stripped.to_string()
448 }
449}
450
451fn evolve_sources(sources: &mut [SourceEntry]) -> Vec<(String, String)> {
455 let mut changes = Vec::new();
456 for source in sources.iter_mut() {
457 let total_fetches = source.fetch_success_count + source.fetch_fail_count;
459
460 if source.enabled
462 && source.accessibility_score < PAUSE_ACCESSIBILITY_THRESHOLD
463 && source.fetch_fail_count >= PAUSE_MIN_FAIL_COUNT
464 {
465 source.enabled = false;
466 tracing::info!(
467 "Pausing source {} (accessibility={:.2}, fails={})",
468 source.id,
469 source.accessibility_score,
470 source.fetch_fail_count
471 );
472 changes.push(("source_paused".to_string(), source.id.clone()));
473 }
474
475 if source.mutable
477 && source.quality_score < RETIRE_QUALITY_THRESHOLD
478 && source.rules_contributed == 0
479 && total_fetches >= RETIRE_MIN_FETCHES
480 {
481 source.enabled = false;
482 tracing::info!(
483 "Retiring source {} (quality={:.2})",
484 source.id,
485 source.quality_score
486 );
487 changes.push(("source_retired".to_string(), source.id.clone()));
488 }
489 }
490 changes
491}
492
493fn save_sources(chat_root: &Path, registry: &SourceRegistry) -> Result<()> {
497 let path = chat_root.join("prompts").join("sources.json");
498 if let Some(parent) = path.parent() {
499 std::fs::create_dir_all(parent)?;
500 }
501 let json = serde_json::to_string_pretty(registry)?;
502 atomic_write(&path, &json)?;
503 Ok(())
504}
505
506fn merge_external_rules(
508 chat_root: &Path,
509 new_rules: Vec<PlanningRule>,
510) -> Result<Vec<(String, String)>> {
511 if new_rules.is_empty() {
512 return Ok(Vec::new());
513 }
514
515 let rules_path = chat_root.join("prompts").join("rules.json");
516 let mut existing: Vec<PlanningRule> = if rules_path.exists() {
517 std::fs::read_to_string(&rules_path)
518 .ok()
519 .and_then(|s| serde_json::from_str(&s).ok())
520 .unwrap_or_default()
521 } else {
522 Vec::new()
523 };
524
525 let mut changes = Vec::new();
526 let available_slots = 50_usize.saturating_sub(existing.len());
528 for rule in new_rules.into_iter().take(available_slots) {
529 if existing.iter().any(|r| r.id == rule.id) {
530 continue;
531 }
532 changes.push(("external_rule_added".to_string(), rule.id.clone()));
533 existing.push(rule);
534 }
535
536 if !changes.is_empty() {
537 let json = serde_json::to_string_pretty(&existing)?;
538 atomic_write(&rules_path, &json)?;
539 }
540
541 Ok(changes)
542}
543
544pub fn apply_external_rule_promotions(
549 chat_root: &Path,
550 promotions: &[String], ) -> Result<Vec<(String, String)>> {
552 if promotions.is_empty() {
553 return Ok(Vec::new());
554 }
555 let rules_path = chat_root.join("prompts").join("rules.json");
556 if !rules_path.exists() {
557 return Ok(Vec::new());
558 }
559 let mut rules: Vec<PlanningRule> =
560 serde_json::from_str(&std::fs::read_to_string(&rules_path)?)?;
561 let mut changes = Vec::new();
562 for rule in rules.iter_mut() {
563 if promotions.contains(&rule.id) && rule.origin == "external" && rule.priority < 65 {
564 rule.priority = 65;
565 changes.push(("external_rule_promoted".to_string(), rule.id.clone()));
566 }
567 }
568 if !changes.is_empty() {
569 let json = serde_json::to_string_pretty(&rules)?;
570 atomic_write(&rules_path, &json)?;
571 }
572 Ok(changes)
573}
574
575pub async fn run_external_learning<L: EvolutionLlm>(
582 chat_root: &Path,
583 llm: &L,
584 model: &str,
585 txn_id: &str,
586) -> Result<Vec<(String, String)>> {
587 let should_run = {
589 let conn = open_evolution_db(chat_root)?;
590
591 should_run_external_learning(&conn) };
593 if !should_run {
594 return Ok(Vec::new());
595 }
596
597 tracing::info!("EVO-6: Starting external learning run (txn={})", txn_id);
598
599 let mut registry = seed::load_sources(chat_root);
601 let existing_rules = seed::load_rules(chat_root);
602 let existing_summary = existing_rules
603 .iter()
604 .map(|r| format!("- {}: {}", r.id, r.instruction))
605 .collect::<Vec<_>>()
606 .join("\n");
607
608 let prioritized = prioritize_sources(®istry.sources);
609 let to_fetch: Vec<SourceEntry> = prioritized
610 .into_iter()
611 .take(MAX_FETCHES_PER_RUN)
612 .cloned()
613 .collect();
614
615 let mut all_changes: Vec<(String, String)> = Vec::new();
616 let mut source_update_map: Vec<(String, bool, u32)> = Vec::new(); for source in &to_fetch {
620 tracing::debug!("EVO-6: Fetching source {} ({})", source.id, source.url);
621
622 let fetch_result = fetch_source(source).await;
623 let (success, raw) = match fetch_result {
624 Ok(content) if !content.is_empty() => (true, content),
625 Ok(_) => {
626 tracing::warn!("EVO-6: Empty response from {}", source.id);
627 (false, String::new())
628 }
629 Err(e) => {
630 tracing::warn!("EVO-6: Fetch failed for {}: {}", source.id, e);
631 (false, String::new())
632 }
633 };
634
635 if !success || raw.is_empty() {
636 source_update_map.push((source.id.clone(), false, 0));
637 continue;
638 }
639
640 let articles = parse_content(source, &raw);
642 if articles.is_empty() {
643 tracing::debug!("EVO-6: No articles parsed from {}", source.id);
644 source_update_map.push((source.id.clone(), true, 0));
645 continue;
646 }
647
648 let new_rules = match extract_rules_from_content(
650 &articles,
651 &source.domains,
652 &existing_summary,
653 llm,
654 model,
655 )
656 .await
657 {
658 Ok(rules) => rules,
659 Err(e) => {
660 tracing::warn!("EVO-6: Rule extraction failed for {}: {}", source.id, e);
661 Vec::new()
662 }
663 };
664
665 tracing::info!(
666 "EVO-6: Source {} → {} articles → {} candidate rules",
667 source.id,
668 articles.len(),
669 new_rules.len()
670 );
671
672 let rule_changes = merge_external_rules(chat_root, new_rules)?;
674 let rules_added = rule_changes.len() as u32;
675 all_changes.extend(rule_changes);
676 source_update_map.push((source.id.clone(), true, rules_added));
677 }
678
679 for (id, success, rules_added) in &source_update_map {
681 if let Some(src) = registry.sources.iter_mut().find(|s| s.id == *id) {
682 update_accessibility(src, *success);
683 src.rules_contributed += rules_added;
684 }
685 }
686
687 let conn = open_evolution_db(chat_root)?;
689 let _promoted: Vec<PlanningRule> = Vec::new(); let promotion_changes: Vec<(String, String)> = Vec::new(); all_changes.extend(promotion_changes);
692
693 let source_changes = evolve_sources(&mut registry.sources);
694 all_changes.extend(source_changes);
695
696 save_sources(chat_root, ®istry)?;
697
698 log_evolution_event(
700 &conn,
701 chat_root,
702 "external_fetch_run",
703 "",
704 &format!(
705 "{} sources fetched, {} changes",
706 to_fetch.len(),
707 all_changes.len()
708 ),
709 txn_id,
710 )?;
711 for (ctype, cid) in &all_changes {
712 log_evolution_event(&conn, chat_root, ctype, cid, "external learning", txn_id)?;
713 }
714
715 tracing::info!(
716 "EVO-6: External learning complete — {} changes",
717 all_changes.len()
718 );
719 Ok(all_changes)
720}
721
722#[cfg(test)]
723mod tests {
724 use super::*;
725 use crate::feedback;
726 use skilllite_core::planning::{SourceEntry, SourceRegistry};
727
728 fn make_source(id: &str, region: &str, accessibility: f32, quality: f32) -> SourceEntry {
729 SourceEntry {
730 id: id.to_string(),
731 name: id.to_string(),
732 url: format!("https://example.com/{}", id),
733 source_type: "rss".to_string(),
734 parser: "rss_generic".to_string(),
735 region: region.to_string(),
736 language: "zh".to_string(),
737 domains: vec!["programming".to_string()],
738 quality_score: quality,
739 accessibility_score: accessibility,
740 rules_contributed: 0,
741 fetch_success_count: 0,
742 fetch_fail_count: 0,
743 last_fetched: None,
744 mutable: true,
745 origin: "seed".to_string(),
746 enabled: true,
747 }
748 }
749
750 #[test]
751 fn test_prioritize_sources_cn_first() {
752 let sources = vec![
753 make_source("global_a", "global", 0.9, 0.9),
754 make_source("cn_b", "cn", 0.5, 0.5),
755 make_source("cn_a", "cn", 0.9, 0.9),
756 ];
757 let registry = SourceRegistry {
758 version: 1,
759 sources,
760 };
761 let prioritized = prioritize_sources(®istry.sources);
762 assert_eq!(prioritized[0].region, "cn");
763 assert_eq!(prioritized[1].region, "cn");
764 assert_eq!(prioritized[2].region, "global");
765 assert_eq!(prioritized[0].id, "cn_a");
767 }
768
769 #[test]
770 fn test_update_accessibility_ema() {
771 let mut src = make_source("test", "cn", 0.8, 0.8);
772 update_accessibility(&mut src, true);
773 let expected = 0.3 * 1.0 + 0.7 * 0.8;
774 assert!((src.accessibility_score - expected).abs() < 1e-5);
775 assert_eq!(src.fetch_success_count, 1);
776
777 update_accessibility(&mut src, false);
778 let expected2 = 0.3 * 0.0 + 0.7 * expected;
779 assert!((src.accessibility_score - expected2).abs() < 1e-5);
780 assert_eq!(src.fetch_fail_count, 1);
781 }
782
783 #[test]
784 fn test_evolve_sources_pause_low_accessibility() {
785 let mut sources = vec![{
786 let mut s = make_source("low_access", "cn", 0.10, 0.70);
787 s.fetch_fail_count = 8;
788 s
789 }];
790 let changes = evolve_sources(&mut sources);
791 assert!(!sources[0].enabled, "source should be paused");
792 assert!(changes.iter().any(|(t, _)| t == "source_paused"));
793 }
794
795 #[test]
796 fn test_evolve_sources_retire_mutable() {
797 let mut sources = vec![{
798 let mut s = make_source("low_quality", "cn", 0.9, 0.10);
799 s.fetch_success_count = 25;
800 s.fetch_fail_count = 10;
801 s.rules_contributed = 0;
802 s.mutable = true;
803 s
804 }];
805 let changes = evolve_sources(&mut sources);
806 assert!(!sources[0].enabled, "source should be retired");
807 assert!(changes.iter().any(|(t, _)| t == "source_retired"));
808 }
809
810 #[test]
811 fn test_evolve_sources_no_retire_immutable() {
812 let mut sources = vec![{
813 let mut s = make_source("seed_src", "cn", 0.9, 0.10);
814 s.fetch_success_count = 25;
815 s.fetch_fail_count = 10;
816 s.rules_contributed = 0;
817 s.mutable = false; s
819 }];
820 let changes = evolve_sources(&mut sources);
821 assert!(!changes.iter().any(|(t, _)| t == "source_retired"));
823 }
824
825 #[test]
826 fn test_extract_json_array_with_fences() {
827 let input = "```json\n[{\"id\": \"ext_test\"}]\n```";
828 let result = extract_json_array(input);
829 assert!(result.contains("ext_test"));
830 let arr: Vec<serde_json::Value> =
831 serde_json::from_str(&result).expect("extract_json_array output should be valid JSON");
832 assert_eq!(arr.len(), 1);
833 }
834
835 #[test]
836 fn test_parse_external_rule_response_valid() {
837 let input = r#"[{"id":"ext_prefer_logging","priority":50,"keywords":["log","debug"],"context_keywords":[],"tool_hint":null,"instruction":"Always add structured logging before running commands."}]"#;
838 let rules =
839 parse_external_rule_response(input).expect("valid external rule JSON should parse");
840 assert_eq!(rules.len(), 1);
841 assert_eq!(rules[0].id, "ext_prefer_logging");
842 assert_eq!(rules[0].origin, "external");
843 assert!(rules[0].mutable);
844 assert_eq!(rules[0].priority, 50);
845 }
846
847 #[test]
848 fn test_parse_external_rule_response_bad_id_rejected() {
849 let input = r#"[{"id":"bad_rule","priority":50,"keywords":["log"],"context_keywords":[],"tool_hint":null,"instruction":"Some instruction."}]"#;
851 let rules = parse_external_rule_response(input)
852 .expect("parse should succeed (empty rules for bad id)");
853 assert!(rules.is_empty(), "non-ext_ id should be rejected");
854 }
855
856 #[test]
857 fn test_parse_rss_basic() {
858 let rss = r#"<?xml version="1.0"?>
859<rss><channel>
860<item><title>Test Article</title><description>Some content here</description></item>
861<item><title>Another Article</title><description>More content</description></item>
862</channel></rss>"#;
863 let articles = parse_rss(rss);
864 assert_eq!(articles.len(), 2);
865 assert_eq!(articles[0].0, "Test Article");
866 }
867
868 #[test]
869 fn test_strip_html_basic() {
870 let html = "<p>Hello <b>world</b>!</p>";
871 assert_eq!(strip_html_basic(html), "Hello world!");
872 }
873
874 #[test]
875 fn test_should_run_env_disabled_by_default() {
876 std::env::remove_var("SKILLLITE_EXTERNAL_LEARNING");
878 let conn = Connection::open_in_memory().expect("in-memory DB should open");
879 conn.execute_batch("PRAGMA foreign_keys=ON;")
880 .expect("PRAGMA should succeed");
881 feedback::ensure_evolution_tables(&conn).expect("tables should be created");
882 assert!(!should_run_external_learning(&conn));
883 }
884
885 #[test]
886 fn test_merge_external_rules_no_duplicates() {
887 let tmp = tempfile::TempDir::new().expect("temp dir should be created");
888 let chat_root = tmp.path();
889 seed::ensure_seed_data(chat_root);
890
891 let new_rule = PlanningRule {
892 id: "ext_test_rule".to_string(),
893 priority: 50,
894 keywords: vec!["test".to_string()],
895 context_keywords: vec![],
896 tool_hint: None,
897 instruction: "Test external rule.".to_string(),
898 mutable: true,
899 origin: "external".to_string(),
900 reusable: false,
901 effectiveness: None,
902 trigger_count: None,
903 };
904
905 let changes1 = merge_external_rules(chat_root, vec![new_rule.clone()])
907 .expect("first merge should succeed");
908 assert_eq!(changes1.len(), 1);
909 assert_eq!(changes1[0].0, "external_rule_added");
910
911 let changes2 = merge_external_rules(chat_root, vec![new_rule])
913 .expect("second merge should succeed (no new rules)");
914 assert!(
915 changes2.is_empty(),
916 "duplicate rule should not be added again"
917 );
918 }
919}