Skip to main content

skilllite_evolution/
external_learner.rs

1//! EVO-6: External knowledge learning.
2//!
3//! Fetches tech articles from external sources (CN-priority), extracts
4//! planning rules via LLM, and evolves the source registry itself
5//! (pause/retire low-quality sources, update accessibility scores).
6//!
7//! Gated by env var: `SKILLLITE_EXTERNAL_LEARNING=1` (default OFF).
8//! Daily cap: max 3 external fetch runs per day.
9//! Network: CN sources use 5s timeout, global sources use 15s timeout.
10
11use std::path::Path;
12
13use anyhow::Result;
14use rusqlite::Connection;
15
16use crate::feedback::open_evolution_db;
17use skilllite_core::planning::{PlanningRule, SourceEntry, SourceRegistry};
18
19use skilllite_fs::atomic_write;
20// use crate::feedback; // unused import, commented out
21use crate::gatekeeper_l3_content;
22use crate::log_evolution_event;
23use crate::seed;
24use crate::EvolutionLlm;
25use crate::EvolutionMessage;
26
27// ─── Configuration constants ─────────────────────────────────────────────────
28
29const EMA_ALPHA: f32 = 0.3;
30const CN_TIMEOUT_SECS: u64 = 5;
31const GLOBAL_TIMEOUT_SECS: u64 = 15;
32/// Max sources to fetch per run (keep total time bounded).
33const MAX_FETCHES_PER_RUN: usize = 3;
34/// Max external learning runs per calendar day.
35const MAX_RUNS_PER_DAY: i64 = 3;
36/// Accessibility threshold below which a source is paused.
37const PAUSE_ACCESSIBILITY_THRESHOLD: f32 = 0.15;
38/// Minimum fail count before pausing (avoid reacting to transient errors).
39const PAUSE_MIN_FAIL_COUNT: u32 = 7;
40/// Quality threshold below which a mutable source may be retired.
41const RETIRE_QUALITY_THRESHOLD: f32 = 0.20;
42/// Minimum fetch attempts before retirement eligibility.
43const RETIRE_MIN_FETCHES: u32 = 30;
44
45const EXTERNAL_KNOWLEDGE_PROMPT: &str =
46    include_str!("seed/evolution_prompts/external_knowledge.seed.md");
47
48// ─── Guard: should we run? ────────────────────────────────────────────────────
49
50/// Check whether external learning is enabled and under the daily cap.
51pub fn should_run_external_learning(conn: &Connection) -> bool {
52    // Env guard (opt-in)
53    let enabled = std::env::var("SKILLLITE_EXTERNAL_LEARNING")
54        .ok()
55        .as_deref()
56        .map(|v| v == "1" || v == "true")
57        .unwrap_or(false);
58    if !enabled {
59        return false;
60    }
61
62    // Daily cap
63    let runs_today: i64 = conn
64        .query_row(
65            "SELECT COUNT(*) FROM evolution_log
66             WHERE type = 'external_fetch_run' AND date(ts) = date('now')",
67            [],
68            |row| row.get(0),
69        )
70        .unwrap_or(0);
71
72    if runs_today >= MAX_RUNS_PER_DAY {
73        tracing::debug!(
74            "External learning daily cap reached ({}/{})",
75            runs_today,
76            MAX_RUNS_PER_DAY
77        );
78        return false;
79    }
80
81    true
82}
83
84// ─── Source prioritization ────────────────────────────────────────────────────
85
86/// Sort sources: CN region first, then by accessibility_score × quality_score descending.
87fn prioritize_sources(sources: &[SourceEntry]) -> Vec<&SourceEntry> {
88    let mut enabled: Vec<&SourceEntry> = sources.iter().filter(|s| s.enabled).collect();
89
90    enabled.sort_by(|a, b| {
91        // CN sources always before global
92        let region_ord = match (a.region.as_str(), b.region.as_str()) {
93            ("cn", "cn") | ("global", "global") => std::cmp::Ordering::Equal,
94            ("cn", _) => std::cmp::Ordering::Less,
95            (_, "cn") => std::cmp::Ordering::Greater,
96            _ => std::cmp::Ordering::Equal,
97        };
98        if region_ord != std::cmp::Ordering::Equal {
99            return region_ord;
100        }
101        // Within same region: sort by composite score descending
102        let score_a = a.accessibility_score * a.quality_score;
103        let score_b = b.accessibility_score * b.quality_score;
104        score_b
105            .partial_cmp(&score_a)
106            .unwrap_or(std::cmp::Ordering::Equal)
107    });
108
109    enabled
110}
111
112// ─── EMA accessibility update ─────────────────────────────────────────────────
113
114/// Update accessibility score with EMA: new = α×result + (1-α)×old
115fn update_accessibility(source: &mut SourceEntry, success: bool) {
116    let result = if success { 1.0_f32 } else { 0.0_f32 };
117    source.accessibility_score =
118        EMA_ALPHA * result + (1.0 - EMA_ALPHA) * source.accessibility_score;
119    if success {
120        source.fetch_success_count += 1;
121    } else {
122        source.fetch_fail_count += 1;
123    }
124    source.last_fetched = Some(chrono::Utc::now().to_rfc3339());
125}
126
127// ─── HTTP fetch ───────────────────────────────────────────────────────────────
128
129/// Fetch raw content from a source. Returns Ok(raw_bytes) or Err.
130async fn fetch_source(source: &SourceEntry) -> Result<String> {
131    let timeout_secs = if source.region == "cn" {
132        CN_TIMEOUT_SECS
133    } else {
134        GLOBAL_TIMEOUT_SECS
135    };
136    let timeout = std::time::Duration::from_secs(timeout_secs);
137
138    let client = reqwest::Client::builder()
139        .timeout(timeout)
140        .user_agent("SkillLite/1.0 (external-learning)")
141        .build()?;
142
143    // Special handling for sources that require POST
144    let response = if source.parser == "juejin" {
145        let body = serde_json::json!({
146            "id_type": 2,
147            "client_type": 2608,
148            "cursor": "0",
149            "limit": 20
150        });
151        client.post(&source.url).json(&body).send().await?
152    } else {
153        client.get(&source.url).send().await?
154    };
155
156    if !response.status().is_success() {
157        anyhow::bail!("HTTP {} from {}", response.status(), source.url);
158    }
159
160    Ok(response.text().await?)
161}
162
163// ─── Content parsers ──────────────────────────────────────────────────────────
164
165/// Parse raw content into a list of (title, snippet) pairs.
166fn parse_content(source: &SourceEntry, raw: &str) -> Vec<(String, String)> {
167    match source.parser.as_str() {
168        "juejin" => parse_juejin_json(raw),
169        "infoq_cn" => parse_infoq_json(raw),
170        "hn_algolia" => parse_hn_algolia_json(raw),
171        "rss_generic" => parse_rss(raw),
172        "github_trending_html" => parse_github_trending(raw),
173        _ => parse_rss(raw), // fallback
174    }
175}
176
177fn parse_juejin_json(raw: &str) -> Vec<(String, String)> {
178    let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
179        return Vec::new();
180    };
181    let items = v["data"].as_array().cloned().unwrap_or_default();
182    items
183        .iter()
184        .take(10)
185        .filter_map(|item| {
186            let title = item["article_info"]["title"].as_str()?.to_string();
187            let brief = item["article_info"]["brief_content"]
188                .as_str()
189                .unwrap_or("")
190                .chars()
191                .take(120)
192                .collect::<String>();
193            Some((title, brief))
194        })
195        .collect()
196}
197
198fn parse_infoq_json(raw: &str) -> Vec<(String, String)> {
199    let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
200        return Vec::new();
201    };
202    let items = v["data"].as_array().cloned().unwrap_or_default();
203    items
204        .iter()
205        .take(10)
206        .filter_map(|item| {
207            let title = item["article"]["title"].as_str()?.to_string();
208            let summary = item["article"]["summary"]
209                .as_str()
210                .unwrap_or("")
211                .chars()
212                .take(120)
213                .collect::<String>();
214            Some((title, summary))
215        })
216        .collect()
217}
218
219fn parse_rss(raw: &str) -> Vec<(String, String)> {
220    // Minimal RSS parser: extract <title> and <description> from <item> blocks.
221    let mut results = Vec::new();
222    let items: Vec<&str> = raw.split("<item>").skip(1).collect();
223    for item in items.iter().take(10) {
224        let title = extract_xml_tag(item, "title").unwrap_or_default();
225        let desc = extract_xml_tag(item, "description").unwrap_or_default();
226        // Strip basic HTML tags from description
227        let desc_clean = strip_html_basic(&desc)
228            .chars()
229            .take(120)
230            .collect::<String>();
231        if !title.is_empty() {
232            results.push((title, desc_clean));
233        }
234    }
235    results
236}
237
238fn parse_github_trending(raw: &str) -> Vec<(String, String)> {
239    // Extract repo names from GitHub trending HTML: look for h2 class="h3 lh-condensed"
240    let mut results = Vec::new();
241    let mut search = raw;
242    while let Some(start) = search.find("h2 class=\"h3 lh-condensed\"") {
243        search = &search[start + 26..];
244        if let Some(link_start) = search.find("<a href=\"/") {
245            let after = &search[link_start + 9..];
246            if let Some(end) = after.find('"') {
247                let repo_path = after[..end].to_string();
248                // The description is in a <p> tag nearby
249                let desc = if let Some(p_start) = search.find("<p ") {
250                    let p_content = &search[p_start..];
251                    if let Some(close) = p_content.find("</p>") {
252                        let inner = &p_content[..close];
253                        strip_html_basic(inner).trim().chars().take(100).collect()
254                    } else {
255                        String::new()
256                    }
257                } else {
258                    String::new()
259                };
260                results.push((repo_path, desc));
261                if results.len() >= 10 {
262                    break;
263                }
264            }
265        }
266    }
267    results
268}
269
270fn parse_hn_algolia_json(raw: &str) -> Vec<(String, String)> {
271    let Ok(v) = serde_json::from_str::<serde_json::Value>(raw) else {
272        return Vec::new();
273    };
274    let hits = v["hits"].as_array().cloned().unwrap_or_default();
275    hits.iter()
276        .take(10)
277        .filter_map(|hit| {
278            let title = hit["title"].as_str()?.to_string();
279            let url = hit["url"].as_str().unwrap_or("").to_string();
280            Some((title, url))
281        })
282        .collect()
283}
284
285fn extract_xml_tag(text: &str, tag: &str) -> Option<String> {
286    let open = format!("<{}", tag);
287    let close = format!("</{}>", tag);
288    let start = text.find(&open)?;
289    let content_start = text[start..].find('>')? + start + 1;
290    let end = text[content_start..].find(&close)? + content_start;
291    let raw = &text[content_start..end];
292    // Unescape common XML/HTML entities
293    let unescaped = raw
294        .replace("&amp;", "&")
295        .replace("&lt;", "<")
296        .replace("&gt;", ">")
297        .replace("&quot;", "\"")
298        .replace("&#39;", "'")
299        .replace("<![CDATA[", "")
300        .replace("]]>", "");
301    Some(unescaped.trim().to_string())
302}
303
304fn strip_html_basic(html: &str) -> String {
305    let mut out = String::with_capacity(html.len());
306    let mut in_tag = false;
307    for ch in html.chars() {
308        match ch {
309            '<' => in_tag = true,
310            '>' => in_tag = false,
311            _ if !in_tag => out.push(ch),
312            _ => {}
313        }
314    }
315    out
316}
317
318// ─── LLM rule extraction ──────────────────────────────────────────────────────
319
320/// Extract planning rules from article content using LLM.
321async fn extract_rules_from_content<L: EvolutionLlm>(
322    articles: &[(String, String)],
323    domains: &[String],
324    existing_summary: &str,
325    llm: &L,
326    model: &str,
327) -> Result<Vec<PlanningRule>> {
328    if articles.is_empty() {
329        return Ok(Vec::new());
330    }
331
332    // Build article content block (titles + snippets)
333    let article_content = articles
334        .iter()
335        .enumerate()
336        .map(|(i, (title, snippet))| {
337            if snippet.is_empty() {
338                format!("{}. {}", i + 1, title)
339            } else {
340                format!("{}. {}\n   {}", i + 1, title, snippet)
341            }
342        })
343        .collect::<Vec<_>>()
344        .join("\n");
345
346    let domains_str = domains.join(", ");
347
348    let prompt = EXTERNAL_KNOWLEDGE_PROMPT
349        .replace("{{domains}}", &domains_str)
350        .replace("{{article_content}}", &article_content)
351        .replace("{{existing_rules_summary}}", existing_summary);
352
353    let messages = vec![EvolutionMessage::user(&prompt)];
354    let content = llm
355        .complete(&messages, model, 0.3)
356        .await?
357        .trim()
358        .to_string();
359
360    if content.is_empty() {
361        return Ok(Vec::new());
362    }
363
364    parse_external_rule_response(&content)
365}
366
367fn parse_external_rule_response(content: &str) -> Result<Vec<PlanningRule>> {
368    // Strip markdown code fences if present
369    let json_str = extract_json_array(content);
370
371    let arr: Vec<serde_json::Value> = serde_json::from_str(&json_str).map_err(|e| {
372        anyhow::anyhow!(
373            "Failed to parse external rule JSON: {}: raw={:.200}",
374            e,
375            content
376        )
377    })?;
378
379    let mut rules = Vec::new();
380    for val in arr {
381        let id = val["id"].as_str().unwrap_or("").to_string();
382        if id.is_empty() || !id.starts_with("ext_") {
383            tracing::warn!("External rule rejected: id '{}' must start with 'ext_'", id);
384            continue;
385        }
386        let instruction = val["instruction"].as_str().unwrap_or("").to_string();
387        if instruction.is_empty() || instruction.len() > 200 {
388            continue;
389        }
390        // L3 content safety check
391        if let Err(e) = gatekeeper_l3_content(&instruction) {
392            tracing::warn!("L3 rejected external rule {}: {}", id, e);
393            continue;
394        }
395        let priority = val["priority"].as_u64().unwrap_or(50).clamp(45, 55) as u32;
396        let keywords: Vec<String> = val["keywords"]
397            .as_array()
398            .map(|a| {
399                a.iter()
400                    .filter_map(|v| v.as_str().map(String::from))
401                    .collect()
402            })
403            .unwrap_or_default();
404        let context_keywords: Vec<String> = val["context_keywords"]
405            .as_array()
406            .map(|a| {
407                a.iter()
408                    .filter_map(|v| v.as_str().map(String::from))
409                    .collect()
410            })
411            .unwrap_or_default();
412        let tool_hint = val["tool_hint"]
413            .as_str()
414            .filter(|s| !s.is_empty() && *s != "null")
415            .map(String::from);
416
417        rules.push(PlanningRule {
418            id,
419            priority,
420            keywords,
421            context_keywords,
422            tool_hint,
423            instruction,
424            mutable: true,
425            origin: "external".to_string(),
426            reusable: false,
427            effectiveness: None,
428            trigger_count: None,
429        });
430    }
431
432    Ok(rules)
433}
434
435fn extract_json_array(content: &str) -> String {
436    // Strip ```json fences or ``` fences
437    let stripped = content
438        .trim()
439        .trim_start_matches("```json")
440        .trim_start_matches("```")
441        .trim_end_matches("```")
442        .trim();
443    // Find first '[' and last ']'
444    if let (Some(start), Some(end)) = (stripped.find('['), stripped.rfind(']')) {
445        stripped[start..=end].to_string()
446    } else {
447        stripped.to_string()
448    }
449}
450
451// ─── Source registry evolution ───────────────────────────────────────────────
452
453/// Apply pause/retire logic to sources based on accessibility + quality scores.
454fn evolve_sources(sources: &mut [SourceEntry]) -> Vec<(String, String)> {
455    let mut changes = Vec::new();
456    for source in sources.iter_mut() {
457        // Only pause/retire mutable sources (seed sources can't be retired, only paused)
458        let total_fetches = source.fetch_success_count + source.fetch_fail_count;
459
460        // Pause: accessibility too low and fail count high enough
461        if source.enabled
462            && source.accessibility_score < PAUSE_ACCESSIBILITY_THRESHOLD
463            && source.fetch_fail_count >= PAUSE_MIN_FAIL_COUNT
464        {
465            source.enabled = false;
466            tracing::info!(
467                "Pausing source {} (accessibility={:.2}, fails={})",
468                source.id,
469                source.accessibility_score,
470                source.fetch_fail_count
471            );
472            changes.push(("source_paused".to_string(), source.id.clone()));
473        }
474
475        // Retire (mutable only): quality too low, no rules contributed, many fetches
476        if source.mutable
477            && source.quality_score < RETIRE_QUALITY_THRESHOLD
478            && source.rules_contributed == 0
479            && total_fetches >= RETIRE_MIN_FETCHES
480        {
481            source.enabled = false;
482            tracing::info!(
483                "Retiring source {} (quality={:.2})",
484                source.id,
485                source.quality_score
486            );
487            changes.push(("source_retired".to_string(), source.id.clone()));
488        }
489    }
490    changes
491}
492
493// ─── Persistence helpers ──────────────────────────────────────────────────────
494
495/// Save source registry atomically to prompts/sources.json.
496fn save_sources(chat_root: &Path, registry: &SourceRegistry) -> Result<()> {
497    let path = chat_root.join("prompts").join("sources.json");
498    if let Some(parent) = path.parent() {
499        std::fs::create_dir_all(parent)?;
500    }
501    let json = serde_json::to_string_pretty(registry)?;
502    atomic_write(&path, &json)?;
503    Ok(())
504}
505
506/// Merge new external rules into existing rules.json, skipping duplicates.
507fn merge_external_rules(
508    chat_root: &Path,
509    new_rules: Vec<PlanningRule>,
510) -> Result<Vec<(String, String)>> {
511    if new_rules.is_empty() {
512        return Ok(Vec::new());
513    }
514
515    let rules_path = chat_root.join("prompts").join("rules.json");
516    let mut existing: Vec<PlanningRule> = if rules_path.exists() {
517        std::fs::read_to_string(&rules_path)
518            .ok()
519            .and_then(|s| serde_json::from_str(&s).ok())
520            .unwrap_or_default()
521    } else {
522        Vec::new()
523    };
524
525    let mut changes = Vec::new();
526    // External rules share the 50-rule cap with internal evolved rules
527    let available_slots = 50_usize.saturating_sub(existing.len());
528    for rule in new_rules.into_iter().take(available_slots) {
529        if existing.iter().any(|r| r.id == rule.id) {
530            continue;
531        }
532        changes.push(("external_rule_added".to_string(), rule.id.clone()));
533        existing.push(rule);
534    }
535
536    if !changes.is_empty() {
537        let json = serde_json::to_string_pretty(&existing)?;
538        atomic_write(&rules_path, &json)?;
539    }
540
541    Ok(changes)
542}
543
544// ─── Priority promotion ────────────────────────────────────────────────────────
545
546/// Promote external rules with effectiveness ≥ 0.7 to priority 65.
547/// Called from feedback.rs integration point (see promote_external_rules).
548pub fn apply_external_rule_promotions(
549    chat_root: &Path,
550    promotions: &[String], // rule IDs to promote
551) -> Result<Vec<(String, String)>> {
552    if promotions.is_empty() {
553        return Ok(Vec::new());
554    }
555    let rules_path = chat_root.join("prompts").join("rules.json");
556    if !rules_path.exists() {
557        return Ok(Vec::new());
558    }
559    let mut rules: Vec<PlanningRule> =
560        serde_json::from_str(&std::fs::read_to_string(&rules_path)?)?;
561    let mut changes = Vec::new();
562    for rule in rules.iter_mut() {
563        if promotions.contains(&rule.id) && rule.origin == "external" && rule.priority < 65 {
564            rule.priority = 65;
565            changes.push(("external_rule_promoted".to_string(), rule.id.clone()));
566        }
567    }
568    if !changes.is_empty() {
569        let json = serde_json::to_string_pretty(&rules)?;
570        atomic_write(&rules_path, &json)?;
571    }
572    Ok(changes)
573}
574
575// ─── Main entry point ─────────────────────────────────────────────────────────
576
577/// Run external learning cycle. Returns (change_type, id) pairs for the changelog.
578///
579/// Gated by `SKILLLITE_EXTERNAL_LEARNING=1`. If not enabled, returns Ok(empty).
580/// Opens its own SQLite connection so the future is `Send`.
581pub async fn run_external_learning<L: EvolutionLlm>(
582    chat_root: &Path,
583    llm: &L,
584    model: &str,
585    txn_id: &str,
586) -> Result<Vec<(String, String)>> {
587    // Phase 1: sync DB check (drop before any await)
588    let should_run = {
589        let conn = open_evolution_db(chat_root)?;
590
591        should_run_external_learning(&conn) // conn dropped here
592    };
593    if !should_run {
594        return Ok(Vec::new());
595    }
596
597    tracing::info!("EVO-6: Starting external learning run (txn={})", txn_id);
598
599    // Load sources and existing rules (sync, no await)
600    let mut registry = seed::load_sources(chat_root);
601    let existing_rules = seed::load_rules(chat_root);
602    let existing_summary = existing_rules
603        .iter()
604        .map(|r| format!("- {}: {}", r.id, r.instruction))
605        .collect::<Vec<_>>()
606        .join("\n");
607
608    let prioritized = prioritize_sources(&registry.sources);
609    let to_fetch: Vec<SourceEntry> = prioritized
610        .into_iter()
611        .take(MAX_FETCHES_PER_RUN)
612        .cloned()
613        .collect();
614
615    let mut all_changes: Vec<(String, String)> = Vec::new();
616    let mut source_update_map: Vec<(String, bool, u32)> = Vec::new(); // (id, success, rules_added)
617
618    // Phase 2: async fetch + LLM calls (no Connection held)
619    for source in &to_fetch {
620        tracing::debug!("EVO-6: Fetching source {} ({})", source.id, source.url);
621
622        let fetch_result = fetch_source(source).await;
623        let (success, raw) = match fetch_result {
624            Ok(content) if !content.is_empty() => (true, content),
625            Ok(_) => {
626                tracing::warn!("EVO-6: Empty response from {}", source.id);
627                (false, String::new())
628            }
629            Err(e) => {
630                tracing::warn!("EVO-6: Fetch failed for {}: {}", source.id, e);
631                (false, String::new())
632            }
633        };
634
635        if !success || raw.is_empty() {
636            source_update_map.push((source.id.clone(), false, 0));
637            continue;
638        }
639
640        // Parse content
641        let articles = parse_content(source, &raw);
642        if articles.is_empty() {
643            tracing::debug!("EVO-6: No articles parsed from {}", source.id);
644            source_update_map.push((source.id.clone(), true, 0));
645            continue;
646        }
647
648        // LLM rule extraction
649        let new_rules = match extract_rules_from_content(
650            &articles,
651            &source.domains,
652            &existing_summary,
653            llm,
654            model,
655        )
656        .await
657        {
658            Ok(rules) => rules,
659            Err(e) => {
660                tracing::warn!("EVO-6: Rule extraction failed for {}: {}", source.id, e);
661                Vec::new()
662            }
663        };
664
665        tracing::info!(
666            "EVO-6: Source {} → {} articles → {} candidate rules",
667            source.id,
668            articles.len(),
669            new_rules.len()
670        );
671
672        // Merge rules into rules.json
673        let rule_changes = merge_external_rules(chat_root, new_rules)?;
674        let rules_added = rule_changes.len() as u32;
675        all_changes.extend(rule_changes);
676        source_update_map.push((source.id.clone(), true, rules_added));
677    }
678
679    // Phase 3: update registry and apply source evolution (sync)
680    for (id, success, rules_added) in &source_update_map {
681        if let Some(src) = registry.sources.iter_mut().find(|s| s.id == *id) {
682            update_accessibility(src, *success);
683            src.rules_contributed += rules_added;
684        }
685    }
686
687    // Phase 3+4: one conn for promote check + logging
688    let conn = open_evolution_db(chat_root)?;
689    let _promoted: Vec<PlanningRule> = Vec::new(); // Temporarily disabled
690    let promotion_changes: Vec<(String, String)> = Vec::new(); // Temporarily disabled
691    all_changes.extend(promotion_changes);
692
693    let source_changes = evolve_sources(&mut registry.sources);
694    all_changes.extend(source_changes);
695
696    save_sources(chat_root, &registry)?;
697
698    // Log the run and each change with the same conn
699    log_evolution_event(
700        &conn,
701        chat_root,
702        "external_fetch_run",
703        "",
704        &format!(
705            "{} sources fetched, {} changes",
706            to_fetch.len(),
707            all_changes.len()
708        ),
709        txn_id,
710    )?;
711    for (ctype, cid) in &all_changes {
712        log_evolution_event(&conn, chat_root, ctype, cid, "external learning", txn_id)?;
713    }
714
715    tracing::info!(
716        "EVO-6: External learning complete — {} changes",
717        all_changes.len()
718    );
719    Ok(all_changes)
720}
721
722#[cfg(test)]
723mod tests {
724    use super::*;
725    use crate::feedback;
726    use skilllite_core::planning::{SourceEntry, SourceRegistry};
727
728    fn make_source(id: &str, region: &str, accessibility: f32, quality: f32) -> SourceEntry {
729        SourceEntry {
730            id: id.to_string(),
731            name: id.to_string(),
732            url: format!("https://example.com/{}", id),
733            source_type: "rss".to_string(),
734            parser: "rss_generic".to_string(),
735            region: region.to_string(),
736            language: "zh".to_string(),
737            domains: vec!["programming".to_string()],
738            quality_score: quality,
739            accessibility_score: accessibility,
740            rules_contributed: 0,
741            fetch_success_count: 0,
742            fetch_fail_count: 0,
743            last_fetched: None,
744            mutable: true,
745            origin: "seed".to_string(),
746            enabled: true,
747        }
748    }
749
750    #[test]
751    fn test_prioritize_sources_cn_first() {
752        let sources = vec![
753            make_source("global_a", "global", 0.9, 0.9),
754            make_source("cn_b", "cn", 0.5, 0.5),
755            make_source("cn_a", "cn", 0.9, 0.9),
756        ];
757        let registry = SourceRegistry {
758            version: 1,
759            sources,
760        };
761        let prioritized = prioritize_sources(&registry.sources);
762        assert_eq!(prioritized[0].region, "cn");
763        assert_eq!(prioritized[1].region, "cn");
764        assert_eq!(prioritized[2].region, "global");
765        // Among CN: higher score first
766        assert_eq!(prioritized[0].id, "cn_a");
767    }
768
769    #[test]
770    fn test_update_accessibility_ema() {
771        let mut src = make_source("test", "cn", 0.8, 0.8);
772        update_accessibility(&mut src, true);
773        let expected = 0.3 * 1.0 + 0.7 * 0.8;
774        assert!((src.accessibility_score - expected).abs() < 1e-5);
775        assert_eq!(src.fetch_success_count, 1);
776
777        update_accessibility(&mut src, false);
778        let expected2 = 0.3 * 0.0 + 0.7 * expected;
779        assert!((src.accessibility_score - expected2).abs() < 1e-5);
780        assert_eq!(src.fetch_fail_count, 1);
781    }
782
783    #[test]
784    fn test_evolve_sources_pause_low_accessibility() {
785        let mut sources = vec![{
786            let mut s = make_source("low_access", "cn", 0.10, 0.70);
787            s.fetch_fail_count = 8;
788            s
789        }];
790        let changes = evolve_sources(&mut sources);
791        assert!(!sources[0].enabled, "source should be paused");
792        assert!(changes.iter().any(|(t, _)| t == "source_paused"));
793    }
794
795    #[test]
796    fn test_evolve_sources_retire_mutable() {
797        let mut sources = vec![{
798            let mut s = make_source("low_quality", "cn", 0.9, 0.10);
799            s.fetch_success_count = 25;
800            s.fetch_fail_count = 10;
801            s.rules_contributed = 0;
802            s.mutable = true;
803            s
804        }];
805        let changes = evolve_sources(&mut sources);
806        assert!(!sources[0].enabled, "source should be retired");
807        assert!(changes.iter().any(|(t, _)| t == "source_retired"));
808    }
809
810    #[test]
811    fn test_evolve_sources_no_retire_immutable() {
812        let mut sources = vec![{
813            let mut s = make_source("seed_src", "cn", 0.9, 0.10);
814            s.fetch_success_count = 25;
815            s.fetch_fail_count = 10;
816            s.rules_contributed = 0;
817            s.mutable = false; // seed = immutable
818            s
819        }];
820        let changes = evolve_sources(&mut sources);
821        // Should NOT be retired (only paused if accessibility is low)
822        assert!(!changes.iter().any(|(t, _)| t == "source_retired"));
823    }
824
825    #[test]
826    fn test_extract_json_array_with_fences() {
827        let input = "```json\n[{\"id\": \"ext_test\"}]\n```";
828        let result = extract_json_array(input);
829        assert!(result.contains("ext_test"));
830        let arr: Vec<serde_json::Value> =
831            serde_json::from_str(&result).expect("extract_json_array output should be valid JSON");
832        assert_eq!(arr.len(), 1);
833    }
834
835    #[test]
836    fn test_parse_external_rule_response_valid() {
837        let input = r#"[{"id":"ext_prefer_logging","priority":50,"keywords":["log","debug"],"context_keywords":[],"tool_hint":null,"instruction":"Always add structured logging before running commands."}]"#;
838        let rules =
839            parse_external_rule_response(input).expect("valid external rule JSON should parse");
840        assert_eq!(rules.len(), 1);
841        assert_eq!(rules[0].id, "ext_prefer_logging");
842        assert_eq!(rules[0].origin, "external");
843        assert!(rules[0].mutable);
844        assert_eq!(rules[0].priority, 50);
845    }
846
847    #[test]
848    fn test_parse_external_rule_response_bad_id_rejected() {
849        // Rule ID doesn't start with ext_ — should be rejected
850        let input = r#"[{"id":"bad_rule","priority":50,"keywords":["log"],"context_keywords":[],"tool_hint":null,"instruction":"Some instruction."}]"#;
851        let rules = parse_external_rule_response(input)
852            .expect("parse should succeed (empty rules for bad id)");
853        assert!(rules.is_empty(), "non-ext_ id should be rejected");
854    }
855
856    #[test]
857    fn test_parse_rss_basic() {
858        let rss = r#"<?xml version="1.0"?>
859<rss><channel>
860<item><title>Test Article</title><description>Some content here</description></item>
861<item><title>Another Article</title><description>More content</description></item>
862</channel></rss>"#;
863        let articles = parse_rss(rss);
864        assert_eq!(articles.len(), 2);
865        assert_eq!(articles[0].0, "Test Article");
866    }
867
868    #[test]
869    fn test_strip_html_basic() {
870        let html = "<p>Hello <b>world</b>!</p>";
871        assert_eq!(strip_html_basic(html), "Hello world!");
872    }
873
874    #[test]
875    fn test_should_run_env_disabled_by_default() {
876        // Without SKILLLITE_EXTERNAL_LEARNING=1, should return false
877        std::env::remove_var("SKILLLITE_EXTERNAL_LEARNING");
878        let conn = Connection::open_in_memory().expect("in-memory DB should open");
879        conn.execute_batch("PRAGMA foreign_keys=ON;")
880            .expect("PRAGMA should succeed");
881        feedback::ensure_evolution_tables(&conn).expect("tables should be created");
882        assert!(!should_run_external_learning(&conn));
883    }
884
885    #[test]
886    fn test_merge_external_rules_no_duplicates() {
887        let tmp = tempfile::TempDir::new().expect("temp dir should be created");
888        let chat_root = tmp.path();
889        seed::ensure_seed_data(chat_root);
890
891        let new_rule = PlanningRule {
892            id: "ext_test_rule".to_string(),
893            priority: 50,
894            keywords: vec!["test".to_string()],
895            context_keywords: vec![],
896            tool_hint: None,
897            instruction: "Test external rule.".to_string(),
898            mutable: true,
899            origin: "external".to_string(),
900            reusable: false,
901            effectiveness: None,
902            trigger_count: None,
903        };
904
905        // First merge: should add the rule
906        let changes1 = merge_external_rules(chat_root, vec![new_rule.clone()])
907            .expect("first merge should succeed");
908        assert_eq!(changes1.len(), 1);
909        assert_eq!(changes1[0].0, "external_rule_added");
910
911        // Second merge: duplicate — should not add again
912        let changes2 = merge_external_rules(chat_root, vec![new_rule])
913            .expect("second merge should succeed (no new rules)");
914        assert!(
915            changes2.is_empty(),
916            "duplicate rule should not be added again"
917        );
918    }
919}