1use serde_json::{json, Value};
2use std::time::Duration;
3
4use crate::embedding::EmbeddingProvider;
5use crate::errors::{InnateError, Result};
6use crate::refine::{DistillProvenance, DistilledChunk, Distiller};
7use crate::settings::{EmbeddingConfig, LlmConfig};
8
9const DISTILL_PROMPT_VERSION: &str = "3";
14
15fn safe_prompt_field(value: Option<&str>) -> String {
16 let value = value.unwrap_or("");
17 let (cleaned, action) = crate::utils::sanitize(value);
18 match action {
19 crate::utils::SanitizeAction::Discard => "[removed unsafe content]".to_string(),
20 _ => cleaned,
21 }
22}
23
24fn build_distill_prompt(log: &Value) -> String {
25 let query = safe_prompt_field(log.get("query").and_then(Value::as_str));
26 let output = safe_prompt_field(log.get("output").and_then(Value::as_str));
27 let output_summary = safe_prompt_field(log.get("output_summary").and_then(Value::as_str));
28 let nomination = safe_prompt_field(log.get("nomination").and_then(Value::as_str));
29 let outcome = safe_prompt_field(log.get("outcome").and_then(Value::as_str));
30
31 let mut context_parts = vec![];
32 if !query.is_empty() {
33 context_parts.push(format!("Query: {query}"));
34 }
35 if !nomination.is_empty() {
36 context_parts.push(format!("Nominated insight: {nomination}"));
37 }
38 if !output_summary.is_empty() {
39 context_parts.push(format!("Summary: {output_summary}"));
40 }
41 if !output.is_empty() {
42 let truncated: String = output.chars().take(1500).collect();
43 context_parts.push(format!("Output (truncated): {truncated}"));
44 }
45 if !outcome.is_empty() {
46 context_parts.push(format!("Outcome: {outcome}"));
47 }
48
49 let context = context_parts.join("\n");
50
51 format!(
52 r#"You are a knowledge distillation assistant. Given an agent interaction log, \
53extract zero or more independent reusable procedural principles. Favor GENERAL, \
54transferable skills, methods, and techniques over project-specific facts.
55
56Agent interaction:
57{context}
58
59Output a JSON array. Each item has:
60{{
61 "content": "<principle; when it applies; what to avoid>",
62 "trigger_desc": "<2-6 word canonical phrase>",
63 "anti_trigger_desc": "<when NOT to apply this, or null>"
64}}
65Return [] if nothing is worth keeping.
66
67Rules:
68- content must be self-contained and actionable for a future agent reading cold
69- Prefer transferable methods and techniques; a principle that helps across many
70 projects is worth far more than one tied to this codebase
71- Abstract away project-specific detail: strip repo/file/function/path/variable names
72 and one-off identifiers, and rephrase the lesson as a general principle whoever the
73 next project is. Keep concrete project-specific detail ONLY when the lesson genuinely
74 cannot be generalized without losing its meaning
75- trigger_desc must match the vocabulary a future agent would use in a search query;
76 prefer general, technology- or domain-level phrasing over project-name phrasing
77- Never store conversation text verbatim; always distil to reusable principle form
78- If outcome is "fail", focus on what to avoid
79- Keep principles independent; do not combine unrelated lessons"#
80 )
81}
82
83fn build_distill_prompt_with_related(log: &Value, logs: &[Value]) -> String {
84 let mut prompt = build_distill_prompt(log);
85 let log_id = log.get("id").and_then(Value::as_str).unwrap_or("");
86 let context_key = log.get("context_key").and_then(Value::as_str);
87 let related: Vec<String> = logs
88 .iter()
89 .filter(|other| other.get("id").and_then(Value::as_str).unwrap_or("") != log_id)
90 .filter(|other| {
91 context_key.is_some() && other.get("context_key").and_then(Value::as_str) == context_key
92 })
93 .take(4)
94 .map(|other| {
95 let query = safe_prompt_field(other.get("query").and_then(Value::as_str));
96 let summary = safe_prompt_field(other.get("output_summary").and_then(Value::as_str));
97 let outcome = safe_prompt_field(other.get("outcome").and_then(Value::as_str));
98 format!("- Query: {query}; outcome: {outcome}; summary: {summary}")
99 })
100 .collect();
101 if !related.is_empty() {
102 prompt.push_str(
103 "\n\nRelated recent interactions (use only to identify repeated patterns or conflicts):\n",
104 );
105 prompt.push_str(&related.join("\n"));
106 }
107 prompt
108}
109
110const HTTP_MAX_ATTEMPTS: u32 = 3;
116const HTTP_TIMEOUT: Duration = Duration::from_secs(30);
118
119fn post_json_retry(
124 url: &str,
125 headers: &[(&str, &str)],
126 body: &Value,
127 label: &str,
128) -> Result<Value> {
129 let start = std::time::Instant::now();
133 let mut attempt = 0;
134 let outcome: Result<Value> = loop {
135 attempt += 1;
136 let mut req = ureq::post(url)
141 .config()
142 .timeout_global(Some(HTTP_TIMEOUT))
143 .http_status_as_error(false)
144 .build()
145 .header("Content-Type", "application/json");
146 for (k, v) in headers {
147 req = req.header(*k, *v);
148 }
149 match req.send_json(body) {
150 Ok(mut response) => {
151 let code = response.status().as_u16();
152 if (200..300).contains(&code) {
153 break response
154 .body_mut()
155 .read_json::<Value>()
156 .map_err(|e| {
157 InnateError::Other(format!("{label} response parse error: {e}"))
158 });
159 }
160 let retry_after = response
161 .headers()
162 .get("retry-after")
163 .and_then(|h| h.to_str().ok())
164 .and_then(|s| s.trim().parse::<u64>().ok());
165 if status_is_retryable(code) && attempt < HTTP_MAX_ATTEMPTS {
166 std::thread::sleep(backoff_delay(attempt, retry_after));
167 continue;
168 }
169 let detail = response.body_mut().read_to_string().unwrap_or_default();
172 break Err(InnateError::Other(format!(
173 "{label} HTTP error: status: {code} {detail}"
174 )));
175 }
176 Err(err) => {
177 if attempt < HTTP_MAX_ATTEMPTS {
178 std::thread::sleep(backoff_delay(attempt, None));
179 continue;
180 }
181 break Err(InnateError::Other(format!(
183 "{label} HTTP error: transport: {err}"
184 )));
185 }
186 }
187 };
188 crate::llm_trace::record(label, url, body, &outcome, attempt, start.elapsed());
189 outcome
190}
191
192fn status_is_retryable(code: u16) -> bool {
194 code == 429 || (500..=599).contains(&code)
195}
196
197fn backoff_delay(attempt: u32, retry_after_secs: Option<u64>) -> Duration {
200 if let Some(secs) = retry_after_secs {
201 return Duration::from_secs(secs.min(30));
202 }
203 let shift = (attempt - 1).min(6);
204 Duration::from_millis(250u64.saturating_mul(1 << shift))
205}
206
207pub struct HttpDistiller {
215 config: LlmConfig,
216}
217
218impl HttpDistiller {
219 pub fn new(config: LlmConfig) -> Self {
220 Self { config }
221 }
222
223 fn call(&self, prompt: &str) -> Result<String> {
224 if self.config.provider == "anthropic" {
225 self.call_anthropic(prompt)
226 } else {
227 self.call_openai(prompt)
228 }
229 }
230
231 fn call_openai(&self, prompt: &str) -> Result<String> {
232 let api_key = self
233 .config
234 .resolved_api_key()
235 .ok_or_else(|| InnateError::Other("LLM API key not configured".into()))?;
236
237 let base = self.config.resolved_base_url();
238 let url = format!("{base}/chat/completions");
239
240 let body = json!({
241 "model": self.config.model_id,
242 "messages": [{"role": "user", "content": prompt}],
243 "max_tokens": 800,
244 "temperature": 0.2,
245 });
246
247 let auth = format!("Bearer {api_key}");
248 let resp_json = post_json_retry(&url, &[("Authorization", &auth)], &body, "LLM")?;
249
250 resp_json
251 .pointer("/choices/0/message/content")
252 .and_then(Value::as_str)
253 .map(str::to_string)
254 .ok_or_else(|| InnateError::Other("unexpected LLM response shape".into()))
255 }
256
257 fn call_anthropic(&self, prompt: &str) -> Result<String> {
258 let api_key = self
259 .config
260 .resolved_api_key()
261 .ok_or_else(|| InnateError::Other("Anthropic API key not configured".into()))?;
262
263 let base = self.config.resolved_base_url();
264 let url = format!("{base}/v1/messages");
265
266 let body = json!({
267 "model": self.config.model_id,
268 "max_tokens": 800,
269 "messages": [{"role": "user", "content": prompt}],
270 });
271
272 let resp_json = post_json_retry(
273 &url,
274 &[("x-api-key", &api_key), ("anthropic-version", "2023-06-01")],
275 &body,
276 "Anthropic",
277 )?;
278
279 resp_json
280 .pointer("/content/0/text")
281 .and_then(Value::as_str)
282 .map(str::to_string)
283 .ok_or_else(|| InnateError::Other("unexpected Anthropic response shape".into()))
284 }
285}
286
287impl Distiller for HttpDistiller {
288 fn distill(&self, log_entries: &[Value]) -> crate::errors::Result<Vec<DistilledChunk>> {
289 distill_with(log_entries, |prompt| self.call(prompt))
290 }
291
292 fn distill_with_context(
293 &self,
294 primary: &Value,
295 related_logs: &[Value],
296 ) -> crate::errors::Result<Vec<DistilledChunk>> {
297 distill_entry_with(primary, related_logs, |prompt| self.call(prompt))
298 }
299
300 fn provenance(&self) -> DistillProvenance {
301 DistillProvenance {
302 provider: Some(self.config.provider.clone()),
303 model: Some(self.config.model_id.clone()),
304 prompt_version: Some(DISTILL_PROMPT_VERSION.to_string()),
305 }
306 }
307}
308
309fn distill_with(
314 log_entries: &[Value],
315 call: impl Fn(&str) -> Result<String> + Copy,
316) -> Result<Vec<DistilledChunk>> {
317 let mut out = Vec::new();
318 for entry in log_entries {
319 out.extend(distill_entry_with(entry, log_entries, call)?);
320 }
321 Ok(out)
322}
323
324fn distill_entry_with(
325 entry: &Value,
326 related_logs: &[Value],
327 call: impl Fn(&str) -> Result<String>,
328) -> Result<Vec<DistilledChunk>> {
329 let log_id = entry["id"].as_str().unwrap_or("").to_string();
330 let prompt = build_distill_prompt_with_related(entry, related_logs);
331 let mut raw = call(&prompt)?;
332 let mut parsed = parse_distill_response(&raw);
333 if parsed.is_err() {
334 raw = call(&format!(
335 "{prompt}\n\nYour previous response was invalid. Return only a valid JSON array."
336 ))?;
337 parsed = parse_distill_response(&raw);
338 }
339 let items = parsed.map_err(|error| {
340 InnateError::Other(format!("LLM distillation response invalid: {error}"))
341 })?;
342 let mut out = Vec::new();
343 for parsed in items {
344 let content = parsed
345 .get("content")
346 .and_then(Value::as_str)
347 .map(str::trim)
348 .filter(|s| !s.is_empty());
349 let Some(content) = content else { continue };
350 let trigger_desc = parsed
351 .get("trigger_desc")
352 .and_then(Value::as_str)
353 .map(str::to_string)
354 .filter(|s| !s.is_empty());
355 let anti_trigger_desc = parsed
356 .get("anti_trigger_desc")
357 .and_then(Value::as_str)
358 .map(str::to_string)
359 .filter(|s| !s.is_empty() && s.to_lowercase() != "null");
360 out.push(DistilledChunk {
361 content: content.to_string(),
362 trigger_desc,
363 anti_trigger_desc,
364 source_log_id: log_id.clone(),
365 nomination: entry
366 .get("nomination")
367 .and_then(Value::as_str)
368 .map(str::to_string),
369 });
370 }
371 Ok(out)
372}
373
374fn parse_distill_response(raw: &str) -> std::result::Result<Vec<Value>, String> {
375 let json_str = extract_json(raw);
376 let parsed: Value = serde_json::from_str(json_str.trim()).map_err(|e| e.to_string())?;
377 if parsed.get("skip").and_then(Value::as_bool) == Some(true) {
378 return Ok(vec![]);
379 }
380 match parsed {
381 Value::Array(items) => Ok(items),
382 Value::Object(_) => Ok(vec![parsed]),
383 _ => Err("expected a JSON object or array".to_string()),
384 }
385}
386
387fn extract_json(text: &str) -> &str {
388 let stripped = text.trim();
390 if let Some(inner) = stripped
391 .strip_prefix("```json")
392 .or_else(|| stripped.strip_prefix("```"))
393 {
394 if let Some(end) = inner.rfind("```") {
395 return inner[..end].trim();
396 }
397 }
398 if let (Some(start), Some(end)) = (stripped.find('['), stripped.rfind(']')) {
399 return &stripped[start..=end];
400 }
401 if let (Some(start), Some(end)) = (stripped.find('{'), stripped.rfind('}')) {
403 return &stripped[start..=end];
404 }
405 stripped
406}
407
408pub fn build_distiller(config: &LlmConfig) -> std::sync::Arc<dyn Distiller + Send + Sync> {
413 std::sync::Arc::new(HttpDistiller::new(config.clone()))
414}
415
416pub struct LlmEmbeddingProvider {
421 config: EmbeddingConfig,
422}
423
424#[cfg(test)]
425#[allow(clippy::items_after_test_module)]
426mod tests {
427 use std::cell::Cell;
428
429 use serde_json::json;
430
431 use std::time::Duration;
432
433 use super::{
434 backoff_delay, build_distill_prompt, distill_entry_with, distill_with,
435 parse_distill_response, parse_embedding_response, status_is_retryable,
436 };
437
438 #[test]
439 fn embedding_response_is_parsed_fail_closed() {
440 let resp = json!({"data": [{"embedding": [0.1, 0.2, 0.3]}]});
442 assert_eq!(parse_embedding_response(&resp, 3).unwrap(), vec![0.1f32, 0.2, 0.3]);
443
444 assert!(parse_embedding_response(&resp, 4).is_err());
446
447 let bad = json!({"data": [{"embedding": [0.1, "oops", 0.3]}]});
449 assert!(parse_embedding_response(&bad, 3).is_err());
450
451 let shape = json!({"data": []});
453 assert!(parse_embedding_response(&shape, 3).is_err());
454 }
455
456 #[test]
457 fn only_rate_limit_and_5xx_are_retryable() {
458 assert!(status_is_retryable(429));
459 assert!(status_is_retryable(500));
460 assert!(status_is_retryable(503));
461 assert!(status_is_retryable(599));
462 assert!(!status_is_retryable(400));
463 assert!(!status_is_retryable(401));
464 assert!(!status_is_retryable(404));
465 assert!(!status_is_retryable(200));
466 }
467
468 #[test]
469 fn backoff_is_exponential_and_honors_retry_after() {
470 assert_eq!(backoff_delay(1, None), Duration::from_millis(250));
472 assert_eq!(backoff_delay(2, None), Duration::from_millis(500));
473 assert_eq!(backoff_delay(3, None), Duration::from_millis(1000));
474 assert_eq!(backoff_delay(1, Some(5)), Duration::from_secs(5));
476 assert_eq!(backoff_delay(1, Some(120)), Duration::from_secs(30));
477 }
478
479 #[test]
480 fn prompt_redacts_secrets_before_external_llm_call() {
481 let prompt = build_distill_prompt(&json!({
482 "query": "debug sk-12345678901234567890",
483 "output_summary": "Authorization: Bearer secret-token-value"
484 }));
485 assert!(!prompt.contains("sk-12345678901234567890"));
486 assert!(!prompt.contains("secret-token-value"));
487 assert!(prompt.contains("[REDACTED]"));
488 }
489
490 #[test]
491 fn malformed_response_is_retried_instead_of_silently_skipped() {
492 let calls = Cell::new(0);
493 let chunks = distill_with(&[json!({"id": "log-1", "query": "q"})], |_| {
494 calls.set(calls.get() + 1);
495 if calls.get() == 1 {
496 Ok("not json".to_string())
497 } else {
498 Ok(r#"[{"content":"retry worked","trigger_desc":"retry"}]"#.to_string())
499 }
500 })
501 .unwrap();
502 assert_eq!(calls.get(), 2);
503 assert_eq!(chunks.len(), 1);
504 assert_eq!(chunks[0].content, "retry worked");
505 }
506
507 #[test]
508 fn parser_accepts_multiple_distilled_chunks() {
509 let parsed = parse_distill_response(
510 r#"[{"content":"one"},{"content":"two","anti_trigger_desc":"never"}]"#,
511 )
512 .unwrap();
513 assert_eq!(parsed.len(), 2);
514 }
515
516 #[test]
517 fn nomination_is_distilled_instead_of_bypassing_the_model() {
518 let prompt_seen = Cell::new(false);
519 let entry = json!({
520 "id": "log-1",
521 "query": "original query",
522 "nomination": "raw agent nomination",
523 "output_summary": "summary",
524 "outcome": "ok"
525 });
526 let chunks = distill_entry_with(&entry, std::slice::from_ref(&entry), |prompt| {
527 prompt_seen.set(prompt.contains("raw agent nomination"));
528 Ok(
529 r#"[{"content":"generalized principle","trigger_desc":"generalize","anti_trigger_desc":null}]"#
530 .to_string(),
531 )
532 })
533 .unwrap();
534
535 assert!(prompt_seen.get());
536 assert_eq!(chunks[0].content, "generalized principle");
537 assert_eq!(
538 chunks[0].nomination.as_deref(),
539 Some("raw agent nomination")
540 );
541 }
542}
543
544impl LlmEmbeddingProvider {
545 pub fn new(config: EmbeddingConfig) -> Self {
546 Self { config }
547 }
548
549 fn embed(&self, text: &str) -> Result<Vec<f32>> {
550 let api_key = self
551 .config
552 .resolved_api_key()
553 .ok_or_else(|| InnateError::Other("Embedding API key not configured".into()))?;
554
555 let base = self.config.resolved_base_url();
556 let url = format!("{base}/embeddings");
557
558 let body = json!({
559 "input": text,
560 "model": self.config.model_id,
561 });
562
563 let auth = format!("Bearer {api_key}");
564 let resp_json = post_json_retry(
565 &url,
566 &[("Authorization", &auth)],
567 &body,
568 "Embedding",
569 )?;
570
571 parse_embedding_response(&resp_json, self.config.dim)
572 }
573}
574
575fn parse_embedding_response(resp_json: &Value, expected_dim: usize) -> Result<Vec<f32>> {
581 let embedding = resp_json
582 .pointer("/data/0/embedding")
583 .and_then(Value::as_array)
584 .ok_or_else(|| InnateError::Other("unexpected embedding response shape".into()))?;
585 let vec: Vec<f32> = embedding
586 .iter()
587 .map(|v| {
588 v.as_f64().map(|x| x as f32).ok_or_else(|| {
589 InnateError::Other("embedding response contains a non-numeric element".into())
590 })
591 })
592 .collect::<Result<Vec<f32>>>()?;
593 if vec.len() != expected_dim {
594 return Err(InnateError::Other(format!(
595 "embedding dimension mismatch: provider returned {}, expected {expected_dim} (check embedding.dim)",
596 vec.len(),
597 )));
598 }
599 Ok(vec)
600}
601
602impl EmbeddingProvider for LlmEmbeddingProvider {
603 fn model_name(&self) -> &'static str {
604 "llm-embedding"
605 }
606
607 fn content_dim(&self) -> usize {
608 self.config.dim
609 }
610
611 fn trigger_dim(&self) -> usize {
612 self.config.dim
613 }
614
615 fn embed_content(&self, text: &str) -> Result<Vec<f32>> {
616 self.embed(text)
617 }
618
619 fn embed_trigger(&self, text: &str) -> Result<Vec<f32>> {
620 self.embed(text)
621 }
622
623 fn embed_both(&self, text: &str) -> Result<(Vec<f32>, Vec<f32>)> {
626 let v = self.embed(text)?;
627 Ok((v.clone(), v))
628 }
629}
630
631pub fn test_llm(config: &LlmConfig) -> Result<String> {
637 let distiller = build_distiller(config);
638 let dummy_log = json!({
639 "id": "test",
640 "query": "connection test",
641 "output_summary": "test",
642 "outcome": "ok"
643 });
644 distiller.distill(&[dummy_log])?;
646 Ok(format!("OK — model: {}", config.model_id))
647}
648
649pub fn test_embedding(config: &EmbeddingConfig) -> Result<usize> {
651 let provider = LlmEmbeddingProvider::new(config.clone());
652 let vec = provider.embed("connection test")?;
653 Ok(vec.len())
654}