1use chrono::{DateTime, Utc};
13use rusqlite::{params, Connection};
14use serde::{Deserialize, Serialize};
15use std::collections::{HashMap, HashSet};
16
17use crate::error::{EngramError, Result};
18use crate::storage::queries::get_memory;
19use crate::types::{Memory, MemoryId};
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum ConflictType {
29 Contradiction,
31 Staleness,
33 Duplicate,
35 SemanticOverlap,
37 MetadataInconsistency,
39}
40
41impl ConflictType {
42 pub fn as_str(&self) -> &'static str {
43 match self {
44 ConflictType::Contradiction => "contradiction",
45 ConflictType::Staleness => "staleness",
46 ConflictType::Duplicate => "duplicate",
47 ConflictType::SemanticOverlap => "semantic_overlap",
48 ConflictType::MetadataInconsistency => "metadata_inconsistency",
49 }
50 }
51}
52
53impl std::str::FromStr for ConflictType {
54 type Err = EngramError;
55
56 fn from_str(s: &str) -> Result<Self> {
57 match s.to_lowercase().as_str() {
58 "contradiction" => Ok(ConflictType::Contradiction),
59 "staleness" => Ok(ConflictType::Staleness),
60 "duplicate" => Ok(ConflictType::Duplicate),
61 "semantic_overlap" => Ok(ConflictType::SemanticOverlap),
62 "metadata_inconsistency" => Ok(ConflictType::MetadataInconsistency),
63 _ => Err(EngramError::InvalidInput(format!(
64 "Unknown conflict type: {}",
65 s
66 ))),
67 }
68 }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum ConflictSeverity {
75 Low,
76 Medium,
77 High,
78 Critical,
79}
80
81impl ConflictSeverity {
82 pub fn as_str(&self) -> &'static str {
83 match self {
84 ConflictSeverity::Low => "low",
85 ConflictSeverity::Medium => "medium",
86 ConflictSeverity::High => "high",
87 ConflictSeverity::Critical => "critical",
88 }
89 }
90}
91
92impl std::str::FromStr for ConflictSeverity {
93 type Err = EngramError;
94
95 fn from_str(s: &str) -> Result<Self> {
96 match s.to_lowercase().as_str() {
97 "low" => Ok(ConflictSeverity::Low),
98 "medium" => Ok(ConflictSeverity::Medium),
99 "high" => Ok(ConflictSeverity::High),
100 "critical" => Ok(ConflictSeverity::Critical),
101 _ => Ok(ConflictSeverity::Medium),
102 }
103 }
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum ResolutionType {
110 KeepA,
112 KeepB,
114 Merge,
116 KeepBoth,
118 DeleteBoth,
120 FalsePositive,
122}
123
124impl ResolutionType {
125 pub fn as_str(&self) -> &'static str {
126 match self {
127 ResolutionType::KeepA => "keep_a",
128 ResolutionType::KeepB => "keep_b",
129 ResolutionType::Merge => "merge",
130 ResolutionType::KeepBoth => "keep_both",
131 ResolutionType::DeleteBoth => "delete_both",
132 ResolutionType::FalsePositive => "false_positive",
133 }
134 }
135}
136
137#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
139#[serde(rename_all = "snake_case")]
140pub enum ValidationStatus {
141 Unverified,
142 Verified,
143 Disputed,
144 Stale,
145}
146
147impl ValidationStatus {
148 pub fn as_str(&self) -> &'static str {
149 match self {
150 ValidationStatus::Unverified => "unverified",
151 ValidationStatus::Verified => "verified",
152 ValidationStatus::Disputed => "disputed",
153 ValidationStatus::Stale => "stale",
154 }
155 }
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct MemoryConflict {
165 pub id: i64,
166 pub memory_a_id: MemoryId,
167 pub memory_b_id: MemoryId,
168 pub conflict_type: ConflictType,
169 pub severity: ConflictSeverity,
170 pub description: Option<String>,
171 pub detected_at: DateTime<Utc>,
172 pub resolved_at: Option<DateTime<Utc>>,
173 pub resolution_type: Option<ResolutionType>,
174 pub resolution_notes: Option<String>,
175 pub auto_detected: bool,
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct DuplicateCandidate {
181 pub id: i64,
182 pub memory_a_id: MemoryId,
183 pub memory_b_id: MemoryId,
184 pub similarity_score: f32,
185 pub similarity_type: String,
186 pub detected_at: DateTime<Utc>,
187 pub status: String,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct EnhancedQualityScore {
193 pub overall: f32,
194 pub grade: char,
195 pub clarity: f32,
196 pub completeness: f32,
197 pub freshness: f32,
198 pub consistency: f32,
199 pub source_trust: f32,
200 pub suggestions: Vec<QualitySuggestion>,
201 pub calculated_at: DateTime<Utc>,
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct QualitySuggestion {
207 pub category: String,
208 pub priority: String,
209 pub message: String,
210 pub action: Option<String>,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct SourceTrustScore {
216 pub source_type: String,
217 pub source_identifier: Option<String>,
218 pub trust_score: f32,
219 pub verification_count: i32,
220 pub notes: Option<String>,
221}
222
223#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct QualityReport {
226 pub total_memories: i64,
227 pub average_quality: f32,
228 pub quality_distribution: HashMap<char, i64>,
229 pub top_issues: Vec<QualityIssue>,
230 pub conflicts_count: i64,
231 pub duplicates_count: i64,
232 pub suggestions_summary: Vec<String>,
233 pub generated_at: DateTime<Utc>,
234}
235
236#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct QualityIssue {
239 pub issue_type: String,
240 pub count: i64,
241 pub severity: String,
242 pub description: String,
243}
244
245#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct ContextQualityConfig {
252 pub clarity_weight: f32,
254 pub completeness_weight: f32,
256 pub freshness_weight: f32,
258 pub consistency_weight: f32,
260 pub source_trust_weight: f32,
262 pub duplicate_threshold: f32,
264 pub semantic_threshold: f32,
266 pub staleness_days: i64,
268 pub min_content_length: usize,
270 pub ideal_content_length: usize,
272}
273
274impl Default for ContextQualityConfig {
275 fn default() -> Self {
276 Self {
277 clarity_weight: 0.25,
278 completeness_weight: 0.20,
279 freshness_weight: 0.20,
280 consistency_weight: 0.20,
281 source_trust_weight: 0.15,
282 duplicate_threshold: 0.85,
283 semantic_threshold: 0.80,
284 staleness_days: 90,
285 min_content_length: 20,
286 ideal_content_length: 200,
287 }
288 }
289}
290
291pub fn calculate_text_similarity(text_a: &str, text_b: &str) -> f32 {
297 let ngram_size = 3;
298
299 fn get_ngrams(text: &str, n: usize) -> HashSet<String> {
300 let normalized: String = text
301 .to_lowercase()
302 .chars()
303 .filter(|c| !c.is_whitespace())
304 .collect();
305 if normalized.len() < n {
306 return HashSet::new();
307 }
308 normalized
309 .chars()
310 .collect::<Vec<_>>()
311 .windows(n)
312 .map(|w| w.iter().collect::<String>())
313 .collect()
314 }
315
316 let ngrams_a = get_ngrams(text_a, ngram_size);
317 let ngrams_b = get_ngrams(text_b, ngram_size);
318
319 if ngrams_a.is_empty() && ngrams_b.is_empty() {
320 return 1.0;
321 }
322 if ngrams_a.is_empty() || ngrams_b.is_empty() {
323 return 0.0;
324 }
325
326 let intersection = ngrams_a.intersection(&ngrams_b).count() as f32;
327 let union = ngrams_a.union(&ngrams_b).count() as f32;
328
329 intersection / union
330}
331
332pub fn find_near_duplicates(
334 conn: &Connection,
335 threshold: f32,
336 limit: i64,
337) -> Result<Vec<DuplicateCandidate>> {
338 let mut stmt = conn.prepare(
340 r#"
341 SELECT id, content FROM memories
342 WHERE deleted_at IS NULL
343 ORDER BY created_at DESC
344 LIMIT ?
345 "#,
346 )?;
347
348 let memories: Vec<(i64, String)> = stmt
349 .query_map(params![limit * 2], |row| Ok((row.get(0)?, row.get(1)?)))?
350 .filter_map(|r| r.ok())
351 .collect();
352
353 let mut duplicates = Vec::new();
354
355 for i in 0..memories.len() {
357 for j in (i + 1)..memories.len() {
358 let (id_a, content_a) = &memories[i];
359 let (id_b, content_b) = &memories[j];
360
361 let similarity = calculate_text_similarity(content_a, content_b);
362
363 if similarity >= threshold {
364 let exists: bool = conn.query_row(
366 "SELECT 1 FROM duplicate_candidates WHERE memory_a_id = ? AND memory_b_id = ?",
367 params![id_a, id_b],
368 |_| Ok(true),
369 ).unwrap_or(false);
370
371 if !exists {
372 conn.execute(
373 r#"
374 INSERT OR IGNORE INTO duplicate_candidates
375 (memory_a_id, memory_b_id, similarity_score, similarity_type)
376 VALUES (?, ?, ?, 'content')
377 "#,
378 params![id_a, id_b, similarity],
379 )?;
380
381 duplicates.push(DuplicateCandidate {
382 id: 0,
383 memory_a_id: *id_a,
384 memory_b_id: *id_b,
385 similarity_score: similarity,
386 similarity_type: "content".to_string(),
387 detected_at: Utc::now(),
388 status: "pending".to_string(),
389 });
390 }
391 }
392 }
393 }
394
395 Ok(duplicates)
396}
397
398pub fn get_pending_duplicates(conn: &Connection, limit: i64) -> Result<Vec<DuplicateCandidate>> {
400 let mut stmt = conn.prepare(
401 r#"
402 SELECT id, memory_a_id, memory_b_id, similarity_score, similarity_type, detected_at, status
403 FROM duplicate_candidates
404 WHERE status = 'pending'
405 ORDER BY similarity_score DESC
406 LIMIT ?
407 "#,
408 )?;
409
410 let duplicates = stmt
411 .query_map(params![limit], |row| {
412 Ok(DuplicateCandidate {
413 id: row.get(0)?,
414 memory_a_id: row.get(1)?,
415 memory_b_id: row.get(2)?,
416 similarity_score: row.get(3)?,
417 similarity_type: row.get(4)?,
418 detected_at: row
419 .get::<_, String>(5)?
420 .parse()
421 .unwrap_or_else(|_| Utc::now()),
422 status: row.get(6)?,
423 })
424 })?
425 .filter_map(|r| r.ok())
426 .collect();
427
428 Ok(duplicates)
429}
430
431pub fn find_semantic_duplicates(
437 conn: &Connection,
438 query_embedding: &[f32],
439 threshold: f32,
440 limit: i64,
441) -> Result<Vec<DuplicateCandidate>> {
442 let mut stmt = conn.prepare(
444 r#"
445 SELECT m.id, e.embedding
446 FROM memories m
447 JOIN embeddings e ON m.id = e.memory_id
448 WHERE m.deleted_at IS NULL
449 LIMIT ?
450 "#,
451 )?;
452
453 let memories: Vec<(i64, Vec<f32>)> = stmt
454 .query_map(params![limit], |row| {
455 let id: i64 = row.get(0)?;
456 let embedding_blob: Vec<u8> = row.get(1)?;
457 let embedding: Vec<f32> = embedding_blob
458 .chunks(4)
459 .map(|chunk| {
460 let bytes: [u8; 4] = chunk.try_into().unwrap_or([0; 4]);
461 f32::from_le_bytes(bytes)
462 })
463 .collect();
464 Ok((id, embedding))
465 })?
466 .filter_map(|r| r.ok())
467 .collect();
468
469 let mut duplicates = Vec::new();
470
471 for (id, embedding) in &memories {
472 let similarity = cosine_similarity(query_embedding, embedding);
473 if similarity >= threshold {
474 duplicates.push(DuplicateCandidate {
475 id: 0,
476 memory_a_id: 0, memory_b_id: *id,
478 similarity_score: similarity,
479 similarity_type: "semantic".to_string(),
480 detected_at: Utc::now(),
481 status: "pending".to_string(),
482 });
483 }
484 }
485
486 Ok(duplicates)
487}
488
489fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
491 if a.len() != b.len() || a.is_empty() {
492 return 0.0;
493 }
494
495 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
496 let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
497 let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
498
499 if magnitude_a == 0.0 || magnitude_b == 0.0 {
500 return 0.0;
501 }
502
503 dot_product / (magnitude_a * magnitude_b)
504}
505
506pub fn detect_conflicts(
512 conn: &Connection,
513 memory_id: MemoryId,
514 config: &ContextQualityConfig,
515) -> Result<Vec<MemoryConflict>> {
516 let memory = get_memory(conn, memory_id)?;
517 let mut conflicts = Vec::new();
518
519 let mut stmt = conn.prepare(
521 r#"
522 SELECT id, content, tags, updated_at
523 FROM memories
524 WHERE id != ? AND deleted_at IS NULL
525 AND (
526 -- Same workspace
527 workspace = (SELECT workspace FROM memories WHERE id = ?)
528 -- Or overlapping tags
529 OR EXISTS (
530 SELECT 1 FROM json_each(tags) t1
531 WHERE t1.value IN (SELECT value FROM json_each((SELECT tags FROM memories WHERE id = ?)))
532 )
533 )
534 LIMIT 100
535 "#,
536 )?;
537
538 let candidates: Vec<(i64, String, String, String)> = stmt
539 .query_map(params![memory_id, memory_id, memory_id], |row| {
540 Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
541 })?
542 .filter_map(|r| r.ok())
543 .collect();
544
545 for (other_id, other_content, _other_tags, other_updated) in candidates {
546 let memory_date: DateTime<Utc> = memory.updated_at;
548 let other_date: DateTime<Utc> = other_updated.parse().unwrap_or(memory_date);
549 let days_diff = (memory_date - other_date).num_days().abs();
550
551 if days_diff > config.staleness_days {
552 let similarity = calculate_text_similarity(&memory.content, &other_content);
554 if similarity > 0.3 {
555 let conflict = create_conflict(
556 conn,
557 memory_id,
558 other_id,
559 ConflictType::Staleness,
560 ConflictSeverity::Medium,
561 Some(format!(
562 "Memories differ by {} days and have {:.0}% content similarity",
563 days_diff,
564 similarity * 100.0
565 )),
566 )?;
567 conflicts.push(conflict);
568 }
569 }
570
571 let similarity = calculate_text_similarity(&memory.content, &other_content);
573 if similarity >= config.duplicate_threshold {
574 let conflict = create_conflict(
575 conn,
576 memory_id,
577 other_id,
578 ConflictType::Duplicate,
579 ConflictSeverity::High,
580 Some(format!("Content similarity: {:.0}%", similarity * 100.0)),
581 )?;
582 conflicts.push(conflict);
583 } else if similarity >= config.semantic_threshold {
584 let conflict = create_conflict(
585 conn,
586 memory_id,
587 other_id,
588 ConflictType::SemanticOverlap,
589 ConflictSeverity::Low,
590 Some(format!("Semantic overlap: {:.0}%", similarity * 100.0)),
591 )?;
592 conflicts.push(conflict);
593 }
594 }
595
596 Ok(conflicts)
597}
598
599fn create_conflict(
601 conn: &Connection,
602 memory_a_id: MemoryId,
603 memory_b_id: MemoryId,
604 conflict_type: ConflictType,
605 severity: ConflictSeverity,
606 description: Option<String>,
607) -> Result<MemoryConflict> {
608 let now = Utc::now();
609 let now_str = now.to_rfc3339();
610
611 conn.execute(
612 r#"
613 INSERT OR IGNORE INTO memory_conflicts
614 (memory_a_id, memory_b_id, conflict_type, severity, description, detected_at)
615 VALUES (?, ?, ?, ?, ?, ?)
616 "#,
617 params![
618 memory_a_id,
619 memory_b_id,
620 conflict_type.as_str(),
621 severity.as_str(),
622 description,
623 now_str
624 ],
625 )?;
626
627 let id = conn.last_insert_rowid();
628
629 Ok(MemoryConflict {
630 id,
631 memory_a_id,
632 memory_b_id,
633 conflict_type,
634 severity,
635 description,
636 detected_at: now,
637 resolved_at: None,
638 resolution_type: None,
639 resolution_notes: None,
640 auto_detected: true,
641 })
642}
643
644pub fn get_unresolved_conflicts(conn: &Connection, limit: i64) -> Result<Vec<MemoryConflict>> {
646 let mut stmt = conn.prepare(
647 r#"
648 SELECT id, memory_a_id, memory_b_id, conflict_type, severity, description,
649 detected_at, resolved_at, resolution_type, resolution_notes, auto_detected
650 FROM memory_conflicts
651 WHERE resolved_at IS NULL
652 ORDER BY
653 CASE severity
654 WHEN 'critical' THEN 1
655 WHEN 'high' THEN 2
656 WHEN 'medium' THEN 3
657 ELSE 4
658 END,
659 detected_at DESC
660 LIMIT ?
661 "#,
662 )?;
663
664 let conflicts = stmt
665 .query_map(params![limit], |row| {
666 Ok(MemoryConflict {
667 id: row.get(0)?,
668 memory_a_id: row.get(1)?,
669 memory_b_id: row.get(2)?,
670 conflict_type: row
671 .get::<_, String>(3)?
672 .parse()
673 .unwrap_or(ConflictType::Contradiction),
674 severity: row
675 .get::<_, String>(4)?
676 .parse()
677 .unwrap_or(ConflictSeverity::Medium),
678 description: row.get(5)?,
679 detected_at: row
680 .get::<_, String>(6)?
681 .parse()
682 .unwrap_or_else(|_| Utc::now()),
683 resolved_at: row
684 .get::<_, Option<String>>(7)?
685 .and_then(|s| s.parse().ok()),
686 resolution_type: None,
687 resolution_notes: row.get(9)?,
688 auto_detected: row.get::<_, i32>(10)? == 1,
689 })
690 })?
691 .filter_map(|r| r.ok())
692 .collect();
693
694 Ok(conflicts)
695}
696
697pub fn resolve_conflict(
703 conn: &Connection,
704 conflict_id: i64,
705 resolution_type: ResolutionType,
706 notes: Option<&str>,
707) -> Result<()> {
708 let now = Utc::now().to_rfc3339();
709
710 conn.execute(
711 r#"
712 UPDATE memory_conflicts
713 SET resolved_at = ?, resolution_type = ?, resolution_notes = ?
714 WHERE id = ?
715 "#,
716 params![now, resolution_type.as_str(), notes, conflict_id],
717 )?;
718
719 let (memory_a_id, memory_b_id): (i64, i64) = conn.query_row(
721 "SELECT memory_a_id, memory_b_id FROM memory_conflicts WHERE id = ?",
722 params![conflict_id],
723 |row| Ok((row.get(0)?, row.get(1)?)),
724 )?;
725
726 match resolution_type {
727 ResolutionType::KeepA => {
728 conn.execute(
730 "UPDATE memories SET lifecycle_state = 'archived' WHERE id = ?",
731 params![memory_b_id],
732 )?;
733 }
734 ResolutionType::KeepB => {
735 conn.execute(
737 "UPDATE memories SET lifecycle_state = 'archived' WHERE id = ?",
738 params![memory_a_id],
739 )?;
740 }
741 ResolutionType::DeleteBoth => {
742 let now = Utc::now().to_rfc3339();
743 conn.execute(
744 "UPDATE memories SET deleted_at = ? WHERE id IN (?, ?)",
745 params![now, memory_a_id, memory_b_id],
746 )?;
747 }
748 _ => {
749 }
751 }
752
753 Ok(())
754}
755
756pub fn calculate_quality_score(
762 conn: &Connection,
763 memory_id: MemoryId,
764 config: &ContextQualityConfig,
765) -> Result<EnhancedQualityScore> {
766 let memory = get_memory(conn, memory_id)?;
767
768 let clarity = score_clarity(&memory);
769 let completeness = score_completeness(&memory, config);
770 let freshness = score_freshness(&memory, config);
771 let consistency = score_consistency(conn, memory_id)?;
772 let source_trust = get_source_trust_for_memory(conn, &memory)?;
773
774 let overall = clarity * config.clarity_weight
775 + completeness * config.completeness_weight
776 + freshness * config.freshness_weight
777 + consistency * config.consistency_weight
778 + source_trust * config.source_trust_weight;
779
780 let grade = match overall {
781 s if s >= 0.9 => 'A',
782 s if s >= 0.8 => 'B',
783 s if s >= 0.7 => 'C',
784 s if s >= 0.6 => 'D',
785 _ => 'F',
786 };
787
788 let suggestions = generate_quality_suggestions(
789 &memory,
790 clarity,
791 completeness,
792 freshness,
793 consistency,
794 source_trust,
795 );
796
797 let now = Utc::now();
799 conn.execute(
800 r#"
801 INSERT INTO quality_history
802 (memory_id, quality_score, clarity_score, completeness_score, freshness_score, consistency_score, source_trust_score)
803 VALUES (?, ?, ?, ?, ?, ?, ?)
804 "#,
805 params![memory_id, overall, clarity, completeness, freshness, consistency, source_trust],
806 )?;
807
808 conn.execute(
810 "UPDATE memories SET quality_score = ? WHERE id = ?",
811 params![overall, memory_id],
812 )?;
813
814 Ok(EnhancedQualityScore {
815 overall,
816 grade,
817 clarity,
818 completeness,
819 freshness,
820 consistency,
821 source_trust,
822 suggestions,
823 calculated_at: now,
824 })
825}
826
827fn score_clarity(memory: &Memory) -> f32 {
828 let content = &memory.content;
829 let mut score: f32 = 0.5;
830
831 let sentence_count =
833 content.matches('.').count() + content.matches('!').count() + content.matches('?').count();
834 if sentence_count > 0 {
835 score += 0.15;
836 }
837
838 let word_count = content.split_whitespace().count();
840 if word_count > 0 {
841 let avg_word_len: f32 = content
842 .split_whitespace()
843 .map(|w| w.len() as f32)
844 .sum::<f32>()
845 / word_count as f32;
846
847 if (3.0..=10.0).contains(&avg_word_len) {
848 score += 0.2;
849 }
850 }
851
852 if !memory.tags.is_empty() {
854 score += 0.15;
855 }
856
857 score.min(1.0)
858}
859
860fn score_completeness(memory: &Memory, config: &ContextQualityConfig) -> f32 {
861 let len = memory.content.len();
862
863 if len < config.min_content_length {
864 return 0.3;
865 }
866
867 if len >= config.ideal_content_length {
868 return 1.0;
869 }
870
871 let range = (config.ideal_content_length - config.min_content_length) as f32;
872 let progress = (len - config.min_content_length) as f32;
873 0.3 + 0.7 * (progress / range)
874}
875
876fn score_freshness(memory: &Memory, config: &ContextQualityConfig) -> f32 {
877 let age_days = (Utc::now() - memory.updated_at).num_days() as f32;
878 let staleness = config.staleness_days as f32;
879
880 if age_days <= 0.0 {
881 1.0
882 } else if age_days >= staleness {
883 0.2
884 } else {
885 1.0 - 0.8 * (age_days / staleness)
886 }
887}
888
889fn score_consistency(conn: &Connection, memory_id: MemoryId) -> Result<f32> {
890 let conflict_count: i64 = conn.query_row(
892 r#"
893 SELECT COUNT(*) FROM memory_conflicts
894 WHERE (memory_a_id = ? OR memory_b_id = ?) AND resolved_at IS NULL
895 "#,
896 params![memory_id, memory_id],
897 |row| row.get(0),
898 )?;
899
900 Ok(match conflict_count {
901 0 => 1.0,
902 1 => 0.7,
903 2 => 0.5,
904 _ => 0.3,
905 })
906}
907
908fn get_source_trust_for_memory(conn: &Connection, memory: &Memory) -> Result<f32> {
909 let source_type = memory
911 .metadata
912 .get("origin")
913 .and_then(|v| v.as_str())
914 .unwrap_or("user");
915
916 let trust_score: f32 = conn
917 .query_row(
918 "SELECT trust_score FROM source_trust_scores WHERE source_type = ?",
919 params![source_type],
920 |row| row.get(0),
921 )
922 .unwrap_or(0.7);
923
924 Ok(trust_score)
925}
926
927fn generate_quality_suggestions(
928 memory: &Memory,
929 clarity: f32,
930 completeness: f32,
931 freshness: f32,
932 consistency: f32,
933 _source_trust: f32,
934) -> Vec<QualitySuggestion> {
935 let mut suggestions = Vec::new();
936
937 if completeness < 0.5 {
938 suggestions.push(QualitySuggestion {
939 category: "completeness".to_string(),
940 priority: "high".to_string(),
941 message: "Add more detail to make this memory more useful".to_string(),
942 action: Some("expand".to_string()),
943 });
944 }
945
946 if clarity < 0.5 {
947 suggestions.push(QualitySuggestion {
948 category: "clarity".to_string(),
949 priority: "medium".to_string(),
950 message: "Consider adding structure with clear sentences".to_string(),
951 action: Some("restructure".to_string()),
952 });
953 }
954
955 if memory.tags.is_empty() {
956 suggestions.push(QualitySuggestion {
957 category: "organization".to_string(),
958 priority: "low".to_string(),
959 message: "Add tags to improve organization and searchability".to_string(),
960 action: Some("add_tags".to_string()),
961 });
962 }
963
964 if freshness < 0.3 {
965 suggestions.push(QualitySuggestion {
966 category: "freshness".to_string(),
967 priority: "medium".to_string(),
968 message: "This memory may be outdated - consider reviewing".to_string(),
969 action: Some("review".to_string()),
970 });
971 }
972
973 if consistency < 0.5 {
974 suggestions.push(QualitySuggestion {
975 category: "consistency".to_string(),
976 priority: "high".to_string(),
977 message: "This memory has unresolved conflicts - review and resolve".to_string(),
978 action: Some("resolve_conflicts".to_string()),
979 });
980 }
981
982 suggestions
983}
984
985pub fn generate_quality_report(
991 conn: &Connection,
992 workspace: Option<&str>,
993) -> Result<QualityReport> {
994 let workspace_filter = workspace.unwrap_or("default");
995
996 let total_memories: i64 = conn.query_row(
998 "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL",
999 params![workspace_filter],
1000 |row| row.get(0),
1001 )?;
1002
1003 let average_quality: f32 = conn
1005 .query_row(
1006 "SELECT COALESCE(AVG(quality_score), 0.5) FROM memories WHERE workspace = ? AND deleted_at IS NULL",
1007 params![workspace_filter],
1008 |row| row.get(0),
1009 )
1010 .unwrap_or(0.5);
1011
1012 let mut distribution = HashMap::new();
1014 let grades = ['A', 'B', 'C', 'D', 'F'];
1015 for grade in grades {
1016 let (min, max) = match grade {
1017 'A' => (0.9, 1.1),
1018 'B' => (0.8, 0.9),
1019 'C' => (0.7, 0.8),
1020 'D' => (0.6, 0.7),
1021 _ => (0.0, 0.6),
1022 };
1023 let count: i64 = conn.query_row(
1024 "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL AND quality_score >= ? AND quality_score < ?",
1025 params![workspace_filter, min, max],
1026 |row| row.get(0),
1027 ).unwrap_or(0);
1028 distribution.insert(grade, count);
1029 }
1030
1031 let conflicts_count: i64 = conn.query_row(
1033 "SELECT COUNT(*) FROM memory_conflicts WHERE resolved_at IS NULL",
1034 [],
1035 |row| row.get(0),
1036 )?;
1037
1038 let duplicates_count: i64 = conn.query_row(
1040 "SELECT COUNT(*) FROM duplicate_candidates WHERE status = 'pending'",
1041 [],
1042 |row| row.get(0),
1043 )?;
1044
1045 let mut top_issues = Vec::new();
1047
1048 if conflicts_count > 0 {
1049 top_issues.push(QualityIssue {
1050 issue_type: "conflicts".to_string(),
1051 count: conflicts_count,
1052 severity: "high".to_string(),
1053 description: format!("{} unresolved conflicts detected", conflicts_count),
1054 });
1055 }
1056
1057 if duplicates_count > 0 {
1058 top_issues.push(QualityIssue {
1059 issue_type: "duplicates".to_string(),
1060 count: duplicates_count,
1061 severity: "medium".to_string(),
1062 description: format!("{} potential duplicates found", duplicates_count),
1063 });
1064 }
1065
1066 let low_quality_count: i64 = conn.query_row(
1068 "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL AND quality_score < 0.5",
1069 params![workspace_filter],
1070 |row| row.get(0),
1071 ).unwrap_or(0);
1072
1073 if low_quality_count > 0 {
1074 top_issues.push(QualityIssue {
1075 issue_type: "low_quality".to_string(),
1076 count: low_quality_count,
1077 severity: "medium".to_string(),
1078 description: format!("{} memories with low quality scores", low_quality_count),
1079 });
1080 }
1081
1082 let suggestions_summary = vec![
1083 format!("Average quality score: {:.0}%", average_quality * 100.0),
1084 format!("Total memories: {}", total_memories),
1085 if conflicts_count > 0 {
1086 format!(
1087 "Resolve {} conflicts to improve consistency",
1088 conflicts_count
1089 )
1090 } else {
1091 "No conflicts detected".to_string()
1092 },
1093 ];
1094
1095 Ok(QualityReport {
1096 total_memories,
1097 average_quality,
1098 quality_distribution: distribution,
1099 top_issues,
1100 conflicts_count,
1101 duplicates_count,
1102 suggestions_summary,
1103 generated_at: Utc::now(),
1104 })
1105}
1106
1107pub fn get_source_trust(
1113 conn: &Connection,
1114 source_type: &str,
1115 source_identifier: Option<&str>,
1116) -> Result<SourceTrustScore> {
1117 let identifier = source_identifier.unwrap_or("default");
1118
1119 let result = conn.query_row(
1120 r#"
1121 SELECT source_type, source_identifier, trust_score, verification_count, notes
1122 FROM source_trust_scores
1123 WHERE source_type = ? AND (source_identifier = ? OR source_identifier IS NULL)
1124 ORDER BY source_identifier DESC
1125 LIMIT 1
1126 "#,
1127 params![source_type, identifier],
1128 |row| {
1129 Ok(SourceTrustScore {
1130 source_type: row.get(0)?,
1131 source_identifier: row.get(1)?,
1132 trust_score: row.get(2)?,
1133 verification_count: row.get(3)?,
1134 notes: row.get(4)?,
1135 })
1136 },
1137 );
1138
1139 result.map_err(|_| EngramError::NotFound(0))
1140}
1141
1142pub fn update_source_trust(
1144 conn: &Connection,
1145 source_type: &str,
1146 source_identifier: Option<&str>,
1147 trust_score: f32,
1148 notes: Option<&str>,
1149) -> Result<()> {
1150 let now = Utc::now().to_rfc3339();
1151
1152 conn.execute(
1153 r#"
1154 INSERT INTO source_trust_scores (source_type, source_identifier, trust_score, notes, updated_at)
1155 VALUES (?, ?, ?, ?, ?)
1156 ON CONFLICT(source_type, source_identifier)
1157 DO UPDATE SET trust_score = ?, notes = ?, updated_at = ?
1158 "#,
1159 params![
1160 source_type,
1161 source_identifier,
1162 trust_score,
1163 notes,
1164 now,
1165 trust_score,
1166 notes,
1167 now
1168 ],
1169 )?;
1170
1171 Ok(())
1172}
1173
1174#[cfg(test)]
1179mod tests {
1180 use super::*;
1181
1182 #[test]
1183 fn test_text_similarity() {
1184 let a = "The quick brown fox jumps over the lazy dog";
1185 let b = "The quick brown fox jumps over the lazy cat";
1186 let c = "Something completely different";
1187
1188 let sim_ab = calculate_text_similarity(a, b);
1189 let sim_ac = calculate_text_similarity(a, c);
1190
1191 assert!(sim_ab > 0.8, "Similar texts should have high similarity");
1192 assert!(sim_ac < 0.3, "Different texts should have low similarity");
1193 }
1194
1195 #[test]
1196 fn test_cosine_similarity() {
1197 let a = vec![1.0, 0.0, 0.0];
1198 let b = vec![1.0, 0.0, 0.0];
1199 let c = vec![0.0, 1.0, 0.0];
1200
1201 assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
1202 assert!(cosine_similarity(&a, &c).abs() < 0.001);
1203 }
1204
1205 #[test]
1206 fn test_conflict_type_parsing() {
1207 assert_eq!(
1208 "contradiction".parse::<ConflictType>().unwrap(),
1209 ConflictType::Contradiction
1210 );
1211 assert_eq!(
1212 "duplicate".parse::<ConflictType>().unwrap(),
1213 ConflictType::Duplicate
1214 );
1215 }
1216}