kumiho-construct 2026.5.11

Construct — memory-native AI agent runtime powered by Kumiho
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
//! LLM-driven skill rewrite (kumiho-versioned, step 6e).
//!
//! Step 5 of the self-improving agent plan — closes the feedback loop.
//! [`crate::skills::EffectivenessCache::improvement_candidates`] surfaces
//! skills whose rolling success rate has regressed; this module takes a
//! candidate, asks an LLM to propose revised **markdown content** (not a
//! full SKILL.toml — see step 6e), validates the response, writes it to
//! a fresh `contents/r<N+1>-<timestamp>.md` via [`SkillImprover`], and
//! publishes it as a new Kumiho revision tagged `published` via
//! [`publish_skill_revision`].
//!
//! Gated behind `#[cfg(feature = "skill-creation")]` to mirror
//! [`SkillImprover`]'s own gate.
//!
//! ## Why we ask the LLM for markdown, not TOML
//!
//! From step 6e onwards, `SKILL.toml` is the **identity** of a skill
//! (name, version, kref pointer, tools).  The agent reads its prompts
//! from the markdown file referenced by `[skill].content_file`.  When
//! the rolling success rate regresses we want to evolve the *prompts*,
//! not the metadata — so the LLM is asked to revise the markdown body.
//! The new revision file lives next to its predecessors under
//! `contents/`, and the published kref retags onto it via
//! `publish_skill_revision`.  The previous file is preserved on disk
//! and in Kumiho so step 6f can roll back to it if the new revision
//! itself regresses.
//!
//! ## Flow
//!
//!   1. Cooldown check via `SkillImprover::should_improve_skill`.
//!   2. Load the manifest; bail if `[skill].content_file` is unset
//!      (legacy skill that hasn't been migrated yet — daemon-startup
//!      registration will fix this on next run).
//!   3. Read the current content file.  Skip silently if missing.
//!   4. Call `provider.chat` with a focused system + user prompt that
//!      includes the current markdown, the regression statistics, and
//!      a strict response format ("only the revised markdown inside a
//!      ```markdown fence").
//!   5. Extract the first ```markdown fence from the response.  Return
//!      early if not present.
//!   6. Hand the new content + reason to `SkillImprover::improve_skill`,
//!      which writes the new file under `contents/` and arms the
//!      cooldown.
//!   7. Hand the new file path to `publish_skill_revision`, which
//!      creates the Kumiho revision + artifact + retags `published`
//!      and updates SKILL.toml's `content_file`.
//!
//! Test coverage focuses on the pure helpers — fence extraction and
//! prompt composition.  The end-to-end async path is covered by the
//! daemon integration that consumes this module.

use anyhow::{Context, Result, anyhow};
use std::path::Path;
use std::sync::Arc;

use crate::gateway::kumiho_client::KumihoClient;
use crate::providers::traits::{ChatMessage, ChatRequest, Provider};
use crate::skills::SkillManifest;
use crate::skills::effectiveness_cache::SkillImprovementCandidate;
use crate::skills::improver::SkillImprover;
use crate::skills::registration::publish_skill_revision;

/// All the pieces an auto-improvement attempt needs from the daemon.  The
/// daemon constructs one of these once at startup and threads it into
/// the background task that consumes [`SkillImprovementCandidate`]s.
pub struct AutoImproveContext {
    /// Construct workspace root — `SKILL.toml` lives at
    /// `<workspace>/skills/<slug>/SKILL.toml`.
    pub workspace_dir: std::path::PathBuf,
    /// LLM provider to call for the rewrite proposal.
    pub provider: Arc<dyn Provider>,
    /// Model name to pass through to `provider.chat`.
    pub model: String,
    /// Sampling temperature.  Default conservative — these rewrites
    /// should be deterministic-ish.
    pub temperature: f64,
    /// Kumiho client used to publish the new revision.  Shared with the
    /// effectiveness refresh task so a single daemon talks to one
    /// Kumiho process.
    pub kumiho_client: Arc<KumihoClient>,
    /// Memory project name from `config.kumiho.memory_project`.  Used
    /// by `publish_skill_revision` to verify the manifest's kref is
    /// addressed in the right project before creating a revision.
    pub memory_project: String,
}

/// Default temperature for skill rewrite LLM calls.  Low so the proposed
/// content stays close to the original structure rather than wandering.
pub const DEFAULT_REWRITE_TEMPERATURE: f64 = 0.3;

/// Outcome of [`attempt_skill_improvement`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SkillImprovementOutcome {
    /// Slug of the improved skill.
    pub slug: String,
    /// Kref of the freshly published revision.  Future-proofing for
    /// step 6f's per-revision outcome scoring.
    pub revision_kref: String,
    /// Relative `content_file` path SKILL.toml now points at.
    pub content_file: String,
}

/// Attempt to improve a single regressed skill.
///
/// Returns:
/// - `Ok(Some(outcome))` — improvement was generated, validated,
///   written, and published as a fresh Kumiho revision.  `improver`'s
///   cooldown is now armed for this skill.
/// - `Ok(None)` — skipped (cooldown not expired, manifest missing,
///   `content_file` unset, current content unreadable, LLM response
///   unparseable).  No state was changed.
/// - `Err(_)` — fatal error (filesystem, network, validation).  Caller
///   should log and continue with the next candidate.
pub async fn attempt_skill_improvement(
    ctx: &AutoImproveContext,
    candidate: &SkillImprovementCandidate,
    improver: &mut SkillImprover,
) -> Result<Option<SkillImprovementOutcome>> {
    if !improver.should_improve_skill(&candidate.skill_name) {
        return Ok(None);
    }

    let skill_dir = ctx.workspace_dir.join("skills").join(&candidate.skill_name);
    let manifest_path = skill_dir.join("SKILL.toml");

    // Manifest missing or unreadable → skip silently.  The cache may be
    // tracking outcomes for a skill that lives somewhere else; we
    // can't safely rewrite it.
    let manifest_text = match tokio::fs::read_to_string(&manifest_path).await {
        Ok(s) => s,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
            tracing::debug!(
                skill = %candidate.skill_name,
                path = %manifest_path.display(),
                "auto-improve: SKILL.toml not found on disk; skipping",
            );
            return Ok(None);
        }
        Err(e) => {
            return Err(anyhow!(e).context(format!(
                "auto-improve: failed to read {}",
                manifest_path.display()
            )));
        }
    };

    let manifest: SkillManifest = toml::from_str(&manifest_text)
        .with_context(|| format!("auto-improve: parsing {}", manifest_path.display()))?;
    let Some(content_rel) = manifest.skill.content_file.clone() else {
        tracing::debug!(
            skill = %candidate.skill_name,
            "auto-improve: skill has no content_file; daemon-startup will migrate it on next run",
        );
        return Ok(None);
    };

    let content_path = skill_dir.join(&content_rel);
    let current_content = match tokio::fs::read_to_string(&content_path).await {
        Ok(s) => s,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
            tracing::debug!(
                skill = %candidate.skill_name,
                path = %content_path.display(),
                "auto-improve: content_file not found on disk; skipping",
            );
            return Ok(None);
        }
        Err(e) => {
            return Err(anyhow!(e).context(format!(
                "auto-improve: failed to read {}",
                content_path.display()
            )));
        }
    };

    let prompt = build_improvement_prompt(&current_content, candidate);

    let messages = [
        ChatMessage::system(
            "You revise the markdown prompts that guide an agent through a skill so they avoid the failure patterns the user reports. \
             Return ONLY the complete revised markdown body inside a ```markdown ... ``` fenced code block. \
             Preserve the original structure and headings; tighten or expand sections as needed. \
             Do not include explanation outside the fence.",
        ),
        ChatMessage::user(prompt),
    ];

    let response = ctx
        .provider
        .chat(
            ChatRequest {
                messages: &messages,
                tools: None,
            },
            &ctx.model,
            ctx.temperature,
        )
        .await
        .context("auto-improve: LLM chat call failed")?;

    let new_content = match extract_markdown_fence(response.text_or_empty()) {
        Some(s) => s,
        None => {
            tracing::warn!(
                skill = %candidate.skill_name,
                rate = candidate.rate,
                "auto-improve: LLM response missing ```markdown fence; skipping",
            );
            return Ok(None);
        }
    };

    let reason = format!(
        "auto-improve: rolling success {:.0}% over last {} outcomes",
        candidate.rate * 100.0,
        candidate.total
    );

    // Write the new content file (cooldown-aware).  Returns None when
    // the cooldown is still active — improver.should_improve_skill
    // already filtered that case, but the inner check is a defence in
    // depth in case a long LLM call raced past it.
    let new_file = match improver
        .improve_skill(&candidate.skill_name, &new_content, &reason)
        .await?
    {
        Some(p) => p,
        None => return Ok(None),
    };

    // Publish through Kumiho: create_revision + create_artifact +
    // tag_revision("published") + sync_published_content_path.
    let published = publish_skill_revision(
        &skill_dir,
        &new_file,
        &reason,
        &ctx.kumiho_client,
        &ctx.memory_project,
    )
    .await
    .with_context(|| {
        format!(
            "auto-improve: publish_skill_revision for {}",
            candidate.skill_name
        )
    })?;

    Ok(Some(SkillImprovementOutcome {
        slug: candidate.skill_name.clone(),
        revision_kref: published.revision_kref,
        content_file: published.new_content_file,
    }))
}

/// Build the user-side prompt content.  Pure function — exposed so tests
/// can assert it includes what we need without spinning up an LLM.
pub fn build_improvement_prompt(
    current_skill_content: &str,
    candidate: &SkillImprovementCandidate,
) -> String {
    format!(
        "The skill below has regressed.  Recent rolling success rate is \
         **{rate_pct:.0}%** over **{total}** outcomes — well below our \
         {threshold_pct:.0}% threshold for healthy skills.\n\n\
         Please propose an improved markdown body that addresses common \
         failure modes.  Focus on:\n\
         - clearer step-by-step instructions,\n\
         - explicit handling of edge cases the original may have missed,\n\
         - safer defaults and tighter guard rails,\n\
         - preserving headings and overall structure so the next agent \
         can find the same sections.\n\n\
         Return ONLY the complete revised markdown content inside a \
         ```markdown fenced block — no commentary outside the fence.\n\n\
         ## Current skill content\n\n\
         ```markdown\n\
         {current}\n\
         ```\n",
        rate_pct = candidate.rate * 100.0,
        total = candidate.total,
        threshold_pct = crate::skills::effectiveness_cache::DEFAULT_IMPROVEMENT_THRESHOLD * 100.0,
        current = current_skill_content.trim_end(),
    )
}

/// Extract the contents of the first ```markdown ... ``` fenced code
/// block.  Mirrors the toml-fence extractor in earlier steps, but
/// requires the explicit `markdown` language tag so we don't pick up
/// an unrelated block.
///
/// Returns `None` if no markdown fence is found.  Returns the contents
/// trimmed of leading newlines and trailing whitespace.
pub fn extract_markdown_fence(text: &str) -> Option<String> {
    extract_fenced_block(text, "markdown")
}

fn extract_fenced_block(text: &str, lang: &str) -> Option<String> {
    let opener = format!("```{lang}");
    let start = text.find(&opener)?;
    let after_open = &text[start + opener.len()..];

    let body_start = after_open.find('\n').map(|i| i + 1).unwrap_or(0);
    let body = &after_open[body_start..];

    let close = body.find("\n```").or_else(|| body.find("```"))?;
    let inner = &body[..close];

    let trimmed = inner.trim_matches(|c: char| c == '\n' || c == '\r');
    let final_trim = trimmed.trim_end();
    if final_trim.is_empty() {
        None
    } else {
        Some(final_trim.to_string())
    }
}

/// Validate that a workspace directory is shaped the way `improve_skill`
/// expects (has `skills/<slug>/SKILL.toml`).  Used by the daemon hook
/// before it bothers calling the LLM, so a misconfigured workspace fails
/// fast with a clear error.
pub fn skill_toml_path(workspace_dir: &Path, skill_name: &str) -> std::path::PathBuf {
    workspace_dir
        .join("skills")
        .join(skill_name)
        .join("SKILL.toml")
}

/// Convenience guard for callers that want to bail before any I/O when
/// the skill isn't on disk.  Returns true when `SKILL.toml` exists.
pub async fn skill_is_writable(workspace_dir: &Path, skill_name: &str) -> bool {
    tokio::fs::try_exists(skill_toml_path(workspace_dir, skill_name))
        .await
        .unwrap_or(false)
}

// ---------------------------------------------------------------------------
// Tests — focus on the pure helpers (extract_markdown_fence + build_improvement_prompt).
// The end-to-end async path is exercised by the daemon integration.
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn cand(name: &str, rate: f64, total: u32) -> SkillImprovementCandidate {
        SkillImprovementCandidate {
            skill_name: name.to_string(),
            rate,
            total,
        }
    }

    // ── extract_markdown_fence ─────────────────────────────────────

    #[test]
    fn extract_markdown_fence_basic() {
        let text = "Here is the rewrite:\n\
                    ```markdown\n\
                    # Heading\n\
                    Body text.\n\
                    ```\n";
        let got = extract_markdown_fence(text).expect("fence present");
        assert!(got.contains("# Heading"));
        assert!(got.contains("Body text."));
    }

    #[test]
    fn extract_markdown_fence_handles_trailing_whitespace_on_opener() {
        let text = "```markdown   \n\
                    body\n\
                    ```";
        let got = extract_markdown_fence(text).expect("fence present");
        assert_eq!(got, "body");
    }

    #[test]
    fn extract_markdown_fence_returns_none_when_missing() {
        assert!(extract_markdown_fence("no fence here").is_none());
        // Different language tag — must not match.
        assert!(extract_markdown_fence("```toml\nx = 1\n```").is_none());
        assert!(extract_markdown_fence("```python\nprint('x')\n```").is_none());
    }

    #[test]
    fn extract_markdown_fence_returns_none_when_empty_body() {
        let text = "```markdown\n```";
        assert!(extract_markdown_fence(text).is_none());
    }

    #[test]
    fn extract_markdown_fence_first_block_only() {
        let text = "```markdown\n\
                    first\n\
                    ```\n\
                    \n\
                    ```markdown\n\
                    second\n\
                    ```";
        let got = extract_markdown_fence(text).expect("fence present");
        assert!(got.contains("first"));
        assert!(!got.contains("second"));
    }

    #[test]
    fn extract_markdown_fence_strips_leading_newlines() {
        let text = "```markdown\n\n\nbody\n```";
        let got = extract_markdown_fence(text).expect("fence present");
        assert!(got.starts_with("body"));
    }

    // ── build_improvement_prompt ───────────────────────────────────

    #[test]
    fn build_improvement_prompt_includes_stats() {
        let current = "# my-skill\n\nDo the thing.\n";
        let prompt = build_improvement_prompt(current, &cand("x", 0.25, 40));
        assert!(prompt.contains("**25%**"), "rate not in prompt: {prompt}");
        assert!(prompt.contains("**40** outcomes"));
        assert!(prompt.contains("40%"));
    }

    #[test]
    fn build_improvement_prompt_includes_current_skill() {
        let current = "# sentinel-skill\n\nVersion 0.4.2 instructions.\n";
        let prompt = build_improvement_prompt(current, &cand("sentinel-skill", 0.1, 20));
        assert!(prompt.contains("sentinel-skill"));
        assert!(prompt.contains("0.4.2 instructions"));
        // Wrapped in a ```markdown fence so the LLM sees a clean block.
        assert!(prompt.contains("```markdown"));
    }

    #[test]
    fn build_improvement_prompt_keeps_response_format_strict() {
        let prompt = build_improvement_prompt("# x\n", &cand("x", 0.2, 15));
        assert!(prompt.contains("Return ONLY"));
        assert!(prompt.contains("markdown fenced block"));
    }

    // ── skill_toml_path ────────────────────────────────────────────

    #[test]
    fn skill_toml_path_composes_correctly() {
        let p = skill_toml_path(Path::new("/tmp/ws"), "my-skill");
        assert!(p.ends_with("skills/my-skill/SKILL.toml"));
    }
}