1use sqlx::SqlitePool;
2
3use crate::errors::CoreError;
4
5#[derive(Debug, Clone)]
6#[allow(dead_code)]
7pub struct RuleDocument {
8 pub skill_id: String,
9 pub title: String,
10 pub content: String,
11 pub confidence: f64,
12 pub file_patterns: Option<String>,
15 pub language: Option<String>,
18 pub repo_scope: Option<String>,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct RuleIndexState {
25 pub rule_count: i64,
26 pub max_updated_at: Option<String>,
27 pub embedding_profile: String,
28 pub scope_signature: Option<String>,
37}
38
39pub fn scope_signature_from_skill_ids<'a>(
45 skill_ids: impl IntoIterator<Item = &'a str>,
46) -> Option<String> {
47 use sha1::{Digest, Sha1};
48 let mut ids: Vec<&str> = skill_ids.into_iter().collect();
49 if ids.is_empty() {
50 return None;
51 }
52 ids.sort_unstable();
53 ids.dedup();
54 let mut hasher = Sha1::new();
55 for id in ids {
56 hasher.update(id.as_bytes());
57 hasher.update(b"\0");
59 }
60 let digest = hasher.finalize();
61 let mut hex = String::with_capacity(digest.len() * 2);
62 for byte in digest {
63 hex.push_str(&format!("{byte:02x}"));
64 }
65 Some(hex)
66}
67
68#[derive(sqlx::FromRow)]
69struct RuleRow {
70 id: String,
71 name: String,
72 description: String,
73 r#type: String,
74 tags: String,
75 confidence_score: f64,
76 file_patterns: Option<String>,
77 source_repo: Option<String>,
78}
79
80const LANGUAGE_TAGS: &[&str] = &[
85 "rust",
86 "typescript",
87 "javascript",
88 "python",
89 "go",
90 "java",
91 "kotlin",
92 "swift",
93 "ruby",
94 "php",
95 "cpp",
96 "c++",
97 "csharp",
98 "c#",
99 "c",
100];
101
102pub fn language_from_tags(tags_json: &str) -> Option<String> {
112 let trimmed = tags_json.trim();
113 if trimmed.is_empty() {
114 return None;
115 }
116 let tags: Vec<String> = serde_json::from_str(trimmed).ok()?;
117 for tag in tags {
118 let lower = tag.trim().to_ascii_lowercase();
119 if LANGUAGE_TAGS.iter().any(|known| *known == lower) {
120 let canonical = match lower.as_str() {
123 "c++" => "cpp".to_owned(),
124 "c#" => "csharp".to_owned(),
125 other => other.to_owned(),
126 };
127 return Some(canonical);
128 }
129 }
130 None
131}
132
133pub fn confidence_from_tags(tags_json: &str) -> Option<f64> {
146 let trimmed = tags_json.trim();
147 if trimmed.is_empty() {
148 return None;
149 }
150 let tags: Vec<String> = serde_json::from_str(trimmed).ok()?;
151 let mut cluster_size: Option<u32> = None;
152 let mut severity: Option<String> = None;
153 for tag in &tags {
154 let lower = tag.trim().to_ascii_lowercase();
155 if let Some(rest) = lower.strip_prefix("cluster-size:") {
156 if let Ok(n) = rest.parse::<u32>() {
157 cluster_size = Some(n);
158 }
159 } else if let Some(rest) = lower.strip_prefix("severity:") {
160 severity = Some(rest.to_owned());
161 }
162 }
163 if cluster_size.is_none() && severity.is_none() {
164 return None;
165 }
166 let base_score = if let Some(n) = cluster_size {
167 match n {
168 0 | 1 => 0.55, 2 => 0.7,
170 3..=4 => 0.8,
171 _ => 0.9, }
173 } else {
174 0.7
175 };
176 let score = if let Some(sev) = severity.as_deref() {
177 match sev {
178 "error" => f64::min(base_score + 0.05, 0.95),
179 "info" => f64::max(base_score - 0.05, 0.4),
180 _ => base_score, }
182 } else {
183 base_score
184 };
185 Some(score)
186}
187
188fn language_from_pattern(p: &str) -> Option<&'static str> {
193 let lower = p.to_ascii_lowercase();
194 let ext = lower.rsplit('.').next()?;
195 if ext == lower || ext.contains('/') || ext.contains('*') {
196 return None;
197 }
198 Some(match ext {
199 "rs" => "rust",
200 "ts" | "tsx" => "typescript",
201 "js" | "jsx" | "mjs" | "cjs" => "javascript",
202 "py" | "pyi" => "python",
203 "go" => "go",
204 "java" => "java",
205 "kt" | "kts" => "kotlin",
206 "swift" => "swift",
207 "rb" => "ruby",
208 "php" => "php",
209 "cpp" | "cc" | "cxx" | "hpp" => "cpp",
210 "cs" => "csharp",
211 _ => return None,
212 })
213}
214
215pub fn language_from_file_patterns(file_patterns_json: Option<&str>) -> Option<String> {
221 let raw = file_patterns_json?.trim();
222 if raw.is_empty() {
223 return None;
224 }
225 let patterns: Vec<String> = serde_json::from_str(raw).ok()?;
226 let mut seen: Option<&'static str> = None;
227 for p in &patterns {
228 if let Some(lang) = language_from_pattern(p) {
229 match seen {
230 None => seen = Some(lang),
231 Some(existing) if existing == lang => {}
232 Some(_) => return None,
233 }
234 }
235 }
236 seen.map(String::from)
237}
238
239pub fn repo_scope_from_source_repo(source_repo: Option<&str>) -> Option<String> {
240 if let Some(repo) = source_repo.map(str::trim)
241 && let Some((owner, name)) = repo.split_once('/')
242 && !owner.trim().is_empty()
243 && !name.trim().is_empty()
244 {
245 return Some(format!("{}/{}", owner.trim(), name.trim()).to_ascii_lowercase());
246 }
247 None
248}
249
250impl From<RuleRow> for RuleDocument {
251 fn from(r: RuleRow) -> Self {
252 let language = language_from_tags(&r.tags)
253 .or_else(|| language_from_file_patterns(r.file_patterns.as_deref()));
254 let repo_scope = repo_scope_from_source_repo(r.source_repo.as_deref());
255 let content = match repo_scope.as_deref() {
259 Some(scope) => format!(
260 "Rule ID: {}\nRule Name: {}\nType: {}\nSource: {}\nTags: {}\n\n{}",
261 r.id, r.name, r.r#type, scope, r.tags, r.description
262 ),
263 None => format!(
264 "Rule ID: {}\nRule Name: {}\nType: {}\nTags: {}\n\n{}",
265 r.id, r.name, r.r#type, r.tags, r.description
266 ),
267 };
268 Self {
269 skill_id: r.id,
270 title: r.name,
271 content,
272 confidence: r.confidence_score,
273 file_patterns: r.file_patterns,
274 language,
275 repo_scope,
276 }
277 }
278}
279
280pub async fn load_rules_from_db(pool: &SqlitePool) -> Result<Vec<RuleDocument>, CoreError> {
281 load_rules_from_db_for_engine(pool, None).await
282}
283
284pub async fn load_rule_index_state(pool: &SqlitePool) -> Result<RuleIndexState, CoreError> {
285 let row = sqlx::query!(
290 "SELECT COUNT(*) AS rule_count, MAX(updated_at) AS max_updated_at FROM skills WHERE status = 'active'"
291 )
292 .fetch_one(pool)
293 .await?;
294 Ok(RuleIndexState {
295 rule_count: row.rule_count,
296 max_updated_at: row.max_updated_at,
297 embedding_profile: crate::context::embedding::active_embedding_profile().await,
298 scope_signature: None,
302 })
303}
304
305pub async fn load_rules_from_db_for_engine(
306 pool: &SqlitePool,
307 engine: Option<&str>,
308) -> Result<Vec<RuleDocument>, CoreError> {
309 let rows = match engine {
315 Some("codex") => {
316 sqlx::query_as::<_, RuleRow>(
317 "SELECT id, name, description, type as \"type\", tags, confidence_score, \
318 file_patterns, source_repo FROM skills \
319 WHERE enabled_for_codex = 1 AND status = 'active'",
320 )
321 .fetch_all(pool)
322 .await?
323 }
324 Some("claude") => {
325 sqlx::query_as::<_, RuleRow>(
326 "SELECT id, name, description, type as \"type\", tags, confidence_score, \
327 file_patterns, source_repo FROM skills \
328 WHERE enabled_for_claude = 1 AND status = 'active'",
329 )
330 .fetch_all(pool)
331 .await?
332 }
333 Some("gemini") => {
334 sqlx::query_as::<_, RuleRow>(
335 "SELECT id, name, description, type as \"type\", tags, confidence_score, \
336 file_patterns, source_repo FROM skills \
337 WHERE enabled_for_gemini = 1 AND status = 'active'",
338 )
339 .fetch_all(pool)
340 .await?
341 }
342 Some("cursor") => {
343 sqlx::query_as::<_, RuleRow>(
344 "SELECT id, name, description, type as \"type\", tags, confidence_score, \
345 file_patterns, source_repo FROM skills \
346 WHERE enabled_for_cursor = 1 AND status = 'active'",
347 )
348 .fetch_all(pool)
349 .await?
350 }
351 _ => {
352 sqlx::query_as::<_, RuleRow>(
353 "SELECT id, name, description, type as \"type\", tags, confidence_score, \
354 file_patterns, source_repo FROM skills \
355 WHERE status = 'active'",
356 )
357 .fetch_all(pool)
358 .await?
359 }
360 };
361
362 Ok(rows.into_iter().map(RuleDocument::from).collect())
363}
364
365pub async fn load_rule_confidence_map(
382 pool: &SqlitePool,
383) -> Result<std::collections::HashMap<String, f64>, CoreError> {
384 let rows = sqlx::query!("SELECT id, confidence_score FROM skills WHERE status = 'active'")
385 .fetch_all(pool)
386 .await?;
387 Ok(rows
388 .into_iter()
389 .map(|row| (row.id, row.confidence_score))
390 .collect())
391}
392
393#[derive(Debug, Clone, Default)]
398pub struct RuleRankingInputs {
399 pub confidence_map: Option<std::collections::HashMap<String, f64>>,
400 pub age_days_map: Option<std::collections::HashMap<String, f32>>,
401}
402
403pub async fn load_rule_ranking_inputs(pool: &SqlitePool) -> RuleRankingInputs {
404 RuleRankingInputs {
405 confidence_map: load_rule_confidence_map(pool).await.ok(),
406 age_days_map: load_rule_age_days_map(pool).await.ok(),
407 }
408}
409
410pub async fn load_rule_age_days_map(
421 pool: &SqlitePool,
422) -> Result<std::collections::HashMap<String, f32>, CoreError> {
423 use sqlx::Row;
424 let rows = sqlx::query(
425 "SELECT id, COALESCE(created_at, updated_at) AS ts \
426 FROM skills WHERE status = 'active'",
427 )
428 .fetch_all(pool)
429 .await?;
430 let now = chrono::Utc::now();
431 let mut out = std::collections::HashMap::with_capacity(rows.len());
432 for row in rows {
433 let id: String = row.try_get("id").unwrap_or_default();
434 if id.is_empty() {
435 continue;
436 }
437 let ts: Option<String> = row.try_get("ts").ok();
438 let Some(ts) = ts else { continue };
439 let parsed = chrono::DateTime::parse_from_rfc3339(&ts)
445 .map(|dt| dt.with_timezone(&chrono::Utc))
446 .ok()
447 .or_else(|| {
448 chrono::NaiveDateTime::parse_from_str(&ts, "%Y-%m-%d %H:%M:%S")
449 .ok()
450 .map(|n| n.and_utc())
451 })
452 .or_else(|| {
453 chrono::NaiveDateTime::parse_from_str(&ts, "%Y-%m-%dT%H:%M:%S%.f")
454 .ok()
455 .map(|n| n.and_utc())
456 });
457 if let Some(created) = parsed {
458 let age_days = (now - created).num_seconds().max(0) as f32 / 86_400.0;
459 out.insert(id, age_days);
460 }
461 }
462 Ok(out)
463}
464
465pub async fn load_rule_examples(
467 pool: &SqlitePool,
468 skill_id: &str,
469) -> Result<Vec<RuleExample>, CoreError> {
470 let rows = sqlx::query_as!(
471 RuleExampleRow,
472 "SELECT id, skill_id, bad_code, good_code, description, source \
473 FROM rule_examples WHERE skill_id = ?1 ORDER BY created_at DESC LIMIT 3",
474 skill_id
475 )
476 .fetch_all(pool)
477 .await?;
478 Ok(rows.into_iter().map(RuleExample::from).collect())
479}
480
481pub async fn load_rule_examples_batch(
483 pool: &SqlitePool,
484 skill_ids: &[String],
485) -> Result<std::collections::HashMap<String, Vec<RuleExample>>, CoreError> {
486 if skill_ids.is_empty() {
487 return Ok(std::collections::HashMap::new());
488 }
489 let ids_json = serde_json::to_string(skill_ids)
490 .map_err(|e| CoreError::Internal(format!("serialize skill_ids: {e}")))?;
491 let rows = sqlx::query_as!(
492 RuleExampleRow,
493 "SELECT id, skill_id, bad_code, good_code, description, source \
494 FROM rule_examples \
495 WHERE skill_id IN (SELECT value FROM json_each(?1)) \
496 ORDER BY created_at DESC",
497 ids_json,
498 )
499 .fetch_all(pool)
500 .await?;
501
502 let mut map: std::collections::HashMap<String, Vec<RuleExample>> =
503 std::collections::HashMap::new();
504 for row in rows {
505 let skill_id = row.skill_id.clone();
506 let example = RuleExample::from(row);
507 map.entry(skill_id).or_default().push(example);
508 }
509 for examples in map.values_mut() {
511 examples.truncate(3);
512 }
513 Ok(map)
514}
515
516#[derive(Debug, Clone)]
517pub struct RuleExample {
518 pub id: String,
519 pub skill_id: String,
520 pub bad_code: String,
521 pub good_code: String,
522 pub description: Option<String>,
523 pub source: String,
524}
525
526#[derive(sqlx::FromRow)]
527struct RuleExampleRow {
528 id: String,
529 skill_id: String,
530 bad_code: String,
531 good_code: String,
532 description: Option<String>,
533 source: String,
534}
535
536impl From<RuleExampleRow> for RuleExample {
537 fn from(r: RuleExampleRow) -> Self {
538 Self {
539 id: r.id,
540 skill_id: r.skill_id,
541 bad_code: r.bad_code,
542 good_code: r.good_code,
543 description: r.description,
544 source: r.source,
545 }
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use super::*;
552
553 #[test]
554 fn confidence_from_tags_singleton_downweighted() {
555 let c = confidence_from_tags(r#"["auto-from-extractions","cluster-size:1"]"#).unwrap();
556 assert!((c - 0.55).abs() < 1e-9, "got {c}");
557 }
558
559 #[test]
560 fn confidence_from_tags_large_cluster_strongest() {
561 let c = confidence_from_tags(r#"["cluster-size:8","severity:warning"]"#).unwrap();
562 assert!((c - 0.9).abs() < 1e-9, "got {c}");
563 }
564
565 #[test]
566 fn confidence_from_tags_severity_error_boosts() {
567 let c = confidence_from_tags(r#"["cluster-size:3","severity:error"]"#).unwrap();
568 assert!((c - 0.85).abs() < 1e-9, "got {c}");
569 }
570
571 #[test]
572 fn confidence_from_tags_severity_info_dampens() {
573 let c = confidence_from_tags(r#"["cluster-size:1","severity:info"]"#).unwrap();
574 assert!((c - 0.50).abs() < 1e-9, "got {c}");
575 }
576
577 #[test]
578 fn confidence_from_tags_missing_evidence_returns_none() {
579 assert_eq!(
580 confidence_from_tags(r#"["auto-from-extractions","origin:review-extraction"]"#),
581 None
582 );
583 assert_eq!(confidence_from_tags("[]"), None);
584 assert_eq!(confidence_from_tags(""), None);
585 assert_eq!(confidence_from_tags("not-json"), None);
586 }
587
588 #[test]
589 fn language_from_tags_table() {
590 let cases: &[(&str, Option<&str>)] = &[
591 (r#"["async", "rust", "concurrency"]"#, Some("rust")),
592 (r#"["typescript", "react"]"#, Some("typescript")),
593 (r#"["c++"]"#, Some("cpp")),
595 (r#"["C#"]"#, Some("csharp")),
596 ("[]", None),
598 ("", None),
599 ("not-json", None),
600 (r#"["lint", "performance"]"#, None),
601 ];
602 for (input, expected) in cases {
603 assert_eq!(
604 language_from_tags(input).as_deref(),
605 *expected,
606 "input: {input}"
607 );
608 }
609 }
610
611 #[test]
612 fn language_from_file_patterns_resolves_single_language() {
613 assert_eq!(
614 language_from_file_patterns(Some(r#"["**/*.rs"]"#)).as_deref(),
615 Some("rust")
616 );
617 assert_eq!(
618 language_from_file_patterns(Some(r#"["**/*.ts","**/*.tsx"]"#)).as_deref(),
619 Some("typescript")
620 );
621 assert_eq!(
622 language_from_file_patterns(Some(r#"["src/**/*.go","tests/**/*.go"]"#)).as_deref(),
623 Some("go")
624 );
625 }
626
627 #[test]
628 fn language_from_file_patterns_returns_none_for_mixed_or_universal() {
629 assert_eq!(
631 language_from_file_patterns(Some(r#"["**/*.rs","**/*.go"]"#)),
632 None
633 );
634 assert_eq!(language_from_file_patterns(Some(r#"["**/*"]"#)), None);
636 assert_eq!(language_from_file_patterns(Some(r#"["**/*test*"]"#)), None);
638 }
639
640 #[test]
641 fn language_from_file_patterns_handles_missing_or_empty_input() {
642 assert_eq!(language_from_file_patterns(None), None);
643 assert_eq!(language_from_file_patterns(Some("")), None);
644 assert_eq!(language_from_file_patterns(Some("[]")), None);
645 assert_eq!(language_from_file_patterns(Some("not-json")), None);
646 }
647
648 #[test]
649 fn repo_scope_uses_canonical_source_repo_only() {
650 assert_eq!(
651 repo_scope_from_source_repo(Some("vitejs/vite")).as_deref(),
652 Some("vitejs/vite")
653 );
654 assert!(repo_scope_from_source_repo(None).is_none());
655 assert!(repo_scope_from_source_repo(Some("vitejs")).is_none());
656 assert!(repo_scope_from_source_repo(Some(" /vite")).is_none());
657 }
658
659 #[test]
660 fn scope_signature_depends_only_on_membership() {
661 assert_eq!(
666 scope_signature_from_skill_ids(["a", "b", "c"]),
667 scope_signature_from_skill_ids(["c", "a", "b"]),
668 );
669 assert_eq!(
671 scope_signature_from_skill_ids(["a", "a", "b"]),
672 scope_signature_from_skill_ids(["a", "b"]),
673 );
674 assert_eq!(scope_signature_from_skill_ids(Vec::<&str>::new()), None);
676 assert_ne!(
680 scope_signature_from_skill_ids(["a", "b"]),
681 scope_signature_from_skill_ids(["a", "c"]),
682 );
683 }
684
685 #[test]
686 fn scope_signature_length_delimits_to_avoid_collision() {
687 assert_ne!(
692 scope_signature_from_skill_ids(["ab", "c"]),
693 scope_signature_from_skill_ids(["a", "bc"]),
694 );
695 }
696}