1use sha2::{Digest, Sha256};
2use uuid::Uuid;
3
4use crate::errors::CoreError;
5use crate::models::{AddExampleInput, RememberRuleInput, SkillRecord};
6use crate::observability::privacy::{redact_secretish_tokens, strip_private_tagged_regions};
7
8use super::{SkillRow, add_example, count_captures_today};
9
10#[derive(Debug, Clone)]
11pub struct RememberOutcome {
12 pub skill: SkillRecord,
13 pub deduped: bool,
17 pub dedup_window_hit: bool,
23 pub confidence_after: f64,
26 pub captures_today: i64,
33}
34
35pub const REMEMBER_DEDUP_WINDOW_MS: i64 = 30_000;
40
41pub const REMEMBER_CONVERSATION_CONFIDENCE_CAP: f64 = 0.70;
45pub const REMEMBER_BODY_CHAR_LIMIT: usize = 16 * 1024;
46pub const REMEMBER_EXAMPLE_CHAR_LIMIT: usize = 16 * 1024;
47pub const REMEMBER_FILE_PATTERN_LIMIT: usize = 32;
48pub const REMEMBER_FILE_PATTERN_CHAR_LIMIT: usize = 256;
49
50fn sanitize_remember_text(input: &str) -> String {
51 redact_secretish_tokens(&strip_private_tagged_regions(input))
52}
53
54fn canonical_file_patterns_csv(patterns: Option<&[String]>) -> String {
55 let Some(patterns) = patterns else {
56 return String::new();
57 };
58 let mut patterns: Vec<String> = patterns
59 .iter()
60 .map(|p| p.trim())
61 .filter(|p| !p.is_empty())
62 .map(ToOwned::to_owned)
63 .collect();
64 patterns.sort();
65 patterns.dedup();
66 patterns.join(",")
67}
68
69fn parse_existing_file_patterns_csv(raw: Option<&str>) -> String {
70 let Some(raw) = raw.map(str::trim).filter(|raw| !raw.is_empty()) else {
71 return String::new();
72 };
73 serde_json::from_str::<Vec<String>>(raw)
74 .map(|patterns| canonical_file_patterns_csv(Some(&patterns)))
75 .unwrap_or_default()
76}
77
78fn normalise_dedup_text(value: &str) -> String {
79 value
80 .split_whitespace()
81 .collect::<Vec<_>>()
82 .join(" ")
83 .to_ascii_lowercase()
84}
85
86fn remember_bodies_semantically_match(incoming: &str, existing: &str) -> bool {
87 let incoming = normalise_dedup_text(incoming);
88 let existing = normalise_dedup_text(existing);
89 if incoming.is_empty() || existing.is_empty() {
90 return false;
91 }
92 if incoming == existing {
93 return true;
94 }
95
96 let incoming_terms: std::collections::HashSet<&str> = incoming
97 .split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
98 .filter(|term| term.len() >= 4)
99 .collect();
100 let existing_terms: std::collections::HashSet<&str> = existing
101 .split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
102 .filter(|term| term.len() >= 4)
103 .collect();
104 if incoming_terms.len().min(existing_terms.len()) < 4 {
105 return false;
106 }
107 let overlap = incoming_terms.intersection(&existing_terms).count();
108 let union = incoming_terms.union(&existing_terms).count();
109 union > 0 && (overlap as f64 / union as f64) >= 0.72
110}
111
112pub(crate) fn remember_content_hash(file_patterns_csv: &str, title: &str, body: &str) -> String {
123 let mut hasher = Sha256::new();
124 hasher.update(file_patterns_csv.as_bytes());
125 hasher.update(b"\n");
126 hasher.update(title.as_bytes());
127 hasher.update(b"\n");
128 hasher.update(body.as_bytes());
129 let digest = hasher.finalize();
130 let mut hex = String::with_capacity(digest.len() * 2);
131 for byte in digest {
132 hex.push_str(&format!("{byte:02x}"));
133 }
134 hex
135}
136
137pub const REMEMBER_WARN_THRESHOLD: i64 = 10;
143
144pub const REMEMBER_DAILY_LIMIT: i64 = 50;
149
150async fn strengthen_existing_remember_rule(
151 db: &sqlx::SqlitePool,
152 skill_id: &str,
153 now: &str,
154 reason: &str,
155) -> crate::Result<f64> {
156 let before: f64 = sqlx::query_scalar!(
157 "SELECT confidence_score FROM skills WHERE id = ?1",
158 skill_id
159 )
160 .fetch_one(db)
161 .await?;
162 let after = (before + 0.05).min(REMEMBER_CONVERSATION_CONFIDENCE_CAP);
163
164 sqlx::query!(
165 "UPDATE skills
166 SET confidence_score = ?1,
167 updated_at = ?2
168 WHERE id = ?3",
169 after,
170 now,
171 skill_id,
172 )
173 .execute(db)
174 .await?;
175
176 let event_id = format!("rule-event-{}", Uuid::new_v4());
177 let metadata = serde_json::json!({
178 "signal": "remember_rule_dedup",
179 "delta": 0.05,
180 })
181 .to_string();
182 sqlx::query!(
183 "INSERT INTO rule_events
184 (id, skill_id, kind, source, confidence_before, confidence_after, reason, metadata)
185 VALUES (?1, ?2, 'feedback_accept', 'remember_rule', ?3, ?4, ?5, ?6)",
186 event_id,
187 skill_id,
188 before,
189 after,
190 reason,
191 metadata,
192 )
193 .execute(db)
194 .await?;
195
196 Ok(after)
197}
198
199async fn record_engine_link_failure(
200 db: &sqlx::SqlitePool,
201 skill_id: &str,
202 engine: &str,
203 error: &std::io::Error,
204) {
205 let event_id = format!("rule-event-{}", Uuid::new_v4());
206 let reason = format!("sync_engine_link failed for engine {engine}: {error}");
207 let metadata = serde_json::json!({
208 "engine": engine,
209 "enabled": true,
210 "error": error.to_string(),
211 })
212 .to_string();
213 if let Err(insert_err) = sqlx::query(
214 "INSERT INTO rule_events
215 (id, skill_id, kind, source, reason, metadata)
216 VALUES (?1, ?2, 'engine_link_failed', 'remember_rule', ?3, ?4)",
217 )
218 .bind(event_id)
219 .bind(skill_id)
220 .bind(reason)
221 .bind(metadata)
222 .execute(db)
223 .await
224 {
225 eprintln!("warning: failed to audit sync_engine_link failure: {insert_err}");
226 }
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235pub enum RuleStatus {
236 Active,
237 Pending,
238}
239
240impl RuleStatus {
241 pub const fn as_str(self) -> &'static str {
242 match self {
243 Self::Active => "active",
244 Self::Pending => "pending",
245 }
246 }
247}
248
249pub async fn remember_as_candidate(
255 db: &sqlx::SqlitePool,
256 input: RememberRuleInput,
257) -> crate::Result<RememberOutcome> {
258 let outcome = remember(db, input).await?;
259 if !outcome.deduped {
260 let skill_id = outcome.skill.id.as_str();
261 sqlx::query!(
262 "UPDATE skills SET status = 'pending' WHERE id = ?1",
263 skill_id
264 )
265 .execute(db)
266 .await?;
267 }
268 Ok(outcome)
269}
270
271pub async fn remember_as_candidate_with_confidence(
282 db: &sqlx::SqlitePool,
283 input: RememberRuleInput,
284 confidence: f32,
285) -> crate::Result<RememberOutcome> {
286 let outcome = remember_inner(db, input, Some(f64::from(confidence))).await?;
287 if !outcome.deduped {
288 let skill_id = outcome.skill.id.as_str();
289 sqlx::query!(
290 "UPDATE skills SET status = 'pending' WHERE id = ?1",
291 skill_id
292 )
293 .execute(db)
294 .await?;
295 }
296 Ok(outcome)
297}
298
299pub async fn remember(
300 db: &sqlx::SqlitePool,
301 input: RememberRuleInput,
302) -> crate::Result<RememberOutcome> {
303 remember_inner(db, input, None).await
304}
305
306async fn remember_inner(
313 db: &sqlx::SqlitePool,
314 input: RememberRuleInput,
315 confidence_override: Option<f64>,
316) -> crate::Result<RememberOutcome> {
317 let title_trimmed = input.title.trim();
318 if title_trimmed.is_empty() {
319 return Err(CoreError::Validation(
320 "remember_rule: title must not be empty".into(),
321 ));
322 }
323 if input.body.trim().is_empty() {
324 return Err(CoreError::Validation(
325 "remember_rule: body must not be empty".into(),
326 ));
327 }
328 if input.body.chars().count() > REMEMBER_BODY_CHAR_LIMIT {
329 return Err(CoreError::Validation(format!(
330 "remember_rule: body must be {REMEMBER_BODY_CHAR_LIMIT} chars or fewer"
331 )));
332 }
333 for (label, value) in [
334 ("bad_code", input.bad_code.as_deref()),
335 ("good_code", input.good_code.as_deref()),
336 ] {
337 if value.is_some_and(|v| v.chars().count() > REMEMBER_EXAMPLE_CHAR_LIMIT) {
338 return Err(CoreError::Validation(format!(
339 "remember_rule: {label} must be {REMEMBER_EXAMPLE_CHAR_LIMIT} chars or fewer"
340 )));
341 }
342 }
343 if let Some(patterns) = input.file_patterns.as_ref() {
344 if patterns.len() > REMEMBER_FILE_PATTERN_LIMIT {
345 return Err(CoreError::Validation(format!(
346 "remember_rule: file_patterns accepts at most {REMEMBER_FILE_PATTERN_LIMIT} entries"
347 )));
348 }
349 if patterns
350 .iter()
351 .any(|p| p.chars().count() > REMEMBER_FILE_PATTERN_CHAR_LIMIT)
352 {
353 return Err(CoreError::Validation(format!(
354 "remember_rule: file_patterns entries must be {REMEMBER_FILE_PATTERN_CHAR_LIMIT} chars or fewer"
355 )));
356 }
357 }
358 let body_sanitized = sanitize_remember_text(input.body.trim());
359 let body_trimmed = body_sanitized.trim();
360
361 let slug: String = title_trimmed
365 .to_lowercase()
366 .chars()
367 .map(|c| {
368 if c.is_ascii_alphanumeric() || c == '_' {
369 c
370 } else {
371 '-'
372 }
373 })
374 .collect::<String>()
375 .split('-')
376 .filter(|s| !s.is_empty())
377 .collect::<Vec<_>>()
378 .join("-");
379 if slug.is_empty() {
380 return Err(CoreError::Validation(
381 "remember_rule: title produces an empty slug after sanitization".into(),
382 ));
383 }
384
385 let now_utc = chrono::Utc::now();
386 let now = now_utc.format("%Y-%m-%d %H:%M:%S").to_string();
387 let origin = input
388 .origin
389 .clone()
390 .unwrap_or_else(|| "conversation".into());
391
392 if origin == "conversation" {
400 let captures_today = count_captures_today(db, &origin).await?;
401 if captures_today >= REMEMBER_DAILY_LIMIT {
402 return Err(CoreError::Validation(format!(
403 "remember_rule daily cap reached ({captures_today}/{REMEMBER_DAILY_LIMIT}). \
404 If this is intentional, import review history with `difflore import-reviews`. \
405 If an agent is looping, run `difflore status --json` to audit local memory and archive noisy entries in DiffLore Cloud."
406 )));
407 }
408 }
409
410 let file_patterns_csv = canonical_file_patterns_csv(input.file_patterns.as_deref());
416 let content_hash = remember_content_hash(&file_patterns_csv, title_trimmed, body_trimmed);
417 let now_ms: i64 = now_utc.timestamp_millis();
418 let window_start_ms = now_ms - REMEMBER_DEDUP_WINDOW_MS;
419
420 if origin != "conversation" {
424 let existing_id: Option<String> = sqlx::query_scalar(
425 "SELECT id FROM skills WHERE content_hash = ?1 \
426 ORDER BY hash_created_at ASC, id ASC LIMIT 1",
427 )
428 .bind(content_hash.as_str())
429 .fetch_optional(db)
430 .await?;
431 if let Some(existing) = existing_id {
432 let update_now = now.as_str();
433 let confidence_after = strengthen_existing_remember_rule(
434 db,
435 existing.as_str(),
436 update_now,
437 "import content-hash dedup",
438 )
439 .await?;
440 let row = sqlx::query_as!(
441 SkillRow,
442 "SELECT id, name, source, directory, version, description, type, \
443 engines, tags, trigger, check_prompt, repo_owner, repo_name, repo_branch, readme_url, \
444 enabled_for_codex, enabled_for_claude, enabled_for_gemini, enabled_for_cursor, \
445 installed_at, updated_at, origin FROM skills WHERE id = ?1",
446 existing
447 )
448 .fetch_one(db)
449 .await?;
450 let captures_today = count_captures_today(db, &origin).await?;
451 return Ok(RememberOutcome {
452 skill: SkillRecord::from(row),
453 deduped: true,
454 dedup_window_hit: false,
455 confidence_after,
456 captures_today,
457 });
458 }
459 }
460
461 let window_content_hash = content_hash.as_str();
466 let window_hit_id: Option<String> = sqlx::query_scalar(
467 "SELECT id FROM skills \
468 WHERE content_hash = ?1 \
469 AND origin = 'conversation' \
470 AND hash_created_at IS NOT NULL \
471 AND hash_created_at >= ?2 \
472 ORDER BY hash_created_at DESC, id ASC LIMIT 1",
473 )
474 .bind(window_content_hash)
475 .bind(window_start_ms)
476 .fetch_optional(db)
477 .await?;
478
479 if let Some(existing) = window_hit_id {
480 let update_now = now.as_str();
481 let confidence_after = strengthen_existing_remember_rule(
482 db,
483 existing.as_str(),
484 update_now,
485 "dedup window hit",
486 )
487 .await?;
488 let row = sqlx::query_as!(
489 SkillRow,
490 "SELECT id, name, source, directory, version, description, type, \
491 engines, tags, trigger, check_prompt, repo_owner, repo_name, repo_branch, readme_url, \
492 enabled_for_codex, enabled_for_claude, enabled_for_gemini, enabled_for_cursor, \
493 installed_at, updated_at, origin FROM skills WHERE id = ?1",
494 existing
495 )
496 .fetch_one(db)
497 .await?;
498 let captures_today = count_captures_today(db, &origin).await?;
499 return Ok(RememberOutcome {
500 skill: SkillRecord::from(row),
501 deduped: true,
502 dedup_window_hit: true,
503 confidence_after,
504 captures_today,
505 });
506 }
507
508 let id_prefix = format!("conv-{slug}-");
512 let legacy_rows = sqlx::query_as::<_, (String, String, Option<String>)>(
513 "SELECT id, description, file_patterns FROM skills \
514 WHERE id LIKE ?1 || '%' AND origin = 'conversation' \
515 ORDER BY installed_at ASC, id ASC LIMIT 10",
516 )
517 .bind(id_prefix)
518 .fetch_all(db)
519 .await?;
520 let existing_id = legacy_rows.into_iter().find_map(|row| {
521 let (id, description, file_patterns) = row;
522 let existing_patterns = parse_existing_file_patterns_csv(file_patterns.as_deref());
523 (existing_patterns == file_patterns_csv
524 && remember_bodies_semantically_match(body_trimmed, &description))
525 .then_some(id)
526 });
527
528 if let Some(existing) = existing_id {
529 let update_now = now.as_str();
530 let confidence_after =
531 strengthen_existing_remember_rule(db, existing.as_str(), update_now, "title dedup")
532 .await?;
533 let row = sqlx::query_as!(
534 SkillRow,
535 "SELECT id, name, source, directory, version, description, type, \
536 engines, tags, trigger, check_prompt, repo_owner, repo_name, repo_branch, readme_url, \
537 enabled_for_codex, enabled_for_claude, enabled_for_gemini, enabled_for_cursor, \
538 installed_at, updated_at, origin FROM skills WHERE id = ?1",
539 existing
540 )
541 .fetch_one(db)
542 .await?;
543 let captures_today = count_captures_today(db, &origin).await?;
544 return Ok(RememberOutcome {
545 skill: SkillRecord::from(row),
546 deduped: true,
547 dedup_window_hit: false,
548 confidence_after,
549 captures_today,
550 });
551 }
552
553 let id_suffix = Uuid::new_v4()
557 .to_string()
558 .chars()
559 .take(8)
560 .collect::<String>();
561 let id = format!("conv-{slug}-{id_suffix}");
562 let file_patterns_json = input
563 .file_patterns
564 .as_ref()
565 .filter(|v| !v.is_empty())
566 .map(serde_json::to_string)
567 .transpose()?;
568
569 let mut skill_md = String::new();
573 skill_md.push_str("---\n");
574 skill_md.push_str("type: review_standard\n");
575 skill_md.push_str("engines: [claude]\n");
576 skill_md.push_str(&format!("tags: [{origin}, conversation]\n"));
577 skill_md.push_str("---\n\n");
578 skill_md.push_str(&format!("# {title_trimmed}\n\n"));
579 if let Some(sev) = input.severity.as_deref().filter(|s| !s.is_empty()) {
580 skill_md.push_str(&format!("**Severity:** {sev}\n\n"));
581 }
582 skill_md.push_str(body_trimmed);
583 skill_md.push('\n');
584
585 let base_dir = crate::skill_fs::skills_base_dir()
589 .map_err(CoreError::Internal)?
590 .join("local");
591 std::fs::create_dir_all(&base_dir)
592 .map_err(|e| CoreError::Internal(format!("failed to create skills dir: {e}")))?;
593 let canonical_base = base_dir
594 .canonicalize()
595 .map_err(|e| CoreError::Internal(format!("failed to resolve skills dir: {e}")))?;
596 let skill_dir = base_dir.join(&id);
597 let skill_dir_for_check = canonical_base.join(&id);
598 if !skill_dir_for_check.starts_with(&canonical_base) {
599 return Err(CoreError::Validation(
600 "remember_rule: invalid slug after sanitization".into(),
601 ));
602 }
603 std::fs::create_dir_all(&skill_dir)
604 .map_err(|e| CoreError::Internal(format!("failed to create skill directory: {e}")))?;
605 let canonical_skill = skill_dir
606 .canonicalize()
607 .map_err(|e| CoreError::Internal(format!("failed to resolve skill directory: {e}")))?;
608 if !canonical_skill.starts_with(&canonical_base) {
609 return Err(CoreError::Validation("remember_rule: path escape".into()));
610 }
611 std::fs::write(skill_dir.join("SKILL.md"), &skill_md)
612 .map_err(|e| CoreError::Internal(format!("failed to write SKILL.md: {e}")))?;
613
614 let engines_json = serde_json::to_string(&["claude"])?;
615 let tags_vec: Vec<String> = if origin == "conversation" {
620 vec!["conversation".into()]
621 } else {
622 vec![origin.clone(), "conversation".into()]
623 };
624 let tags_json = serde_json::to_string(&tags_vec)?;
625 let description = body_trimmed.to_owned();
626 let confidence: f64 =
630 confidence_override.map_or(0.6, |c| c.clamp(0.0, REMEMBER_CONVERSATION_CONFIDENCE_CAP));
631
632 let insert_id = id.as_str();
633 let insert_directory = id.as_str();
634 let insert_description = description.as_str();
635 let insert_engines = engines_json.as_str();
636 let insert_tags = tags_json.as_str();
637 let insert_file_patterns = file_patterns_json.as_deref();
638 let insert_now = now.as_str();
639 let insert_origin = origin.as_str();
640 let insert_content_hash = content_hash.as_str();
641 let insert_result = sqlx::query!(
642 "INSERT INTO skills
643 (id, name, source, directory, version, description, type, engines, tags,
644 trigger, check_prompt, file_patterns, enabled_for_claude, confidence_score,
645 installed_at, updated_at, origin, content_hash, hash_created_at)
646 VALUES (?1, ?2, 'local', ?3, '1.0.0', ?4, 'review_standard', ?5, ?6,
647 NULL, NULL, ?7, 1, ?8, ?9, ?9, ?10, ?11, ?12)",
648 insert_id,
649 title_trimmed,
650 insert_directory,
651 insert_description,
652 insert_engines,
653 insert_tags,
654 insert_file_patterns,
655 confidence,
656 insert_now,
657 insert_origin,
658 insert_content_hash,
659 now_ms
660 )
661 .execute(db)
662 .await;
663 if let Err(e) = insert_result {
664 let _ = std::fs::remove_dir_all(&skill_dir);
665 return Err(e.into());
666 }
667
668 if let Err(e) = crate::skill_fs::sync_engine_link("local", &id, "claude", true) {
670 eprintln!("warning: sync_engine_link failed for engine claude: {e}");
671 record_engine_link_failure(db, &id, "claude", &e).await;
672 }
673
674 if let (Some(bad), Some(good)) = (input.bad_code.as_deref(), input.good_code.as_deref()) {
677 let bad = sanitize_remember_text(bad);
678 let good = sanitize_remember_text(good);
679 if !bad.trim().is_empty() && !good.trim().is_empty() {
680 let example_input = AddExampleInput {
681 skill_id: id.clone(),
682 bad_code: bad,
683 good_code: good,
684 description: None,
685 source: Some(origin.clone()),
686 };
687 if let Err(e) = add_example(db, example_input).await {
688 eprintln!("warning: failed to attach example to remembered rule: {e}");
689 }
690 }
691 }
692
693 let row = sqlx::query_as!(
694 SkillRow,
695 "SELECT id, name, source, directory, version, description, type, \
696 engines, tags, trigger, check_prompt, repo_owner, repo_name, repo_branch, readme_url, \
697 enabled_for_codex, enabled_for_claude, enabled_for_gemini, enabled_for_cursor, \
698 installed_at, updated_at, origin FROM skills WHERE id = ?1",
699 id
700 )
701 .fetch_one(db)
702 .await?;
703 let captures_today = count_captures_today(db, &origin).await?;
704 Ok(RememberOutcome {
705 skill: SkillRecord::from(row),
706 deduped: false,
707 dedup_window_hit: false,
708 confidence_after: confidence,
709 captures_today,
710 })
711}