engram/intelligence/
context_quality.rs

1//! Context Quality Module (Phase 9: ENG-48 to ENG-66)
2//!
3//! Provides:
4//! - Near-duplicate detection (ENG-48)
5//! - Semantic deduplication (ENG-49)
6//! - Conflict detection (ENG-50)
7//! - Contradiction resolution (ENG-51)
8//! - Enhanced quality scoring (ENG-52)
9//! - Source credibility (ENG-53)
10//! - Quality improvement suggestions (ENG-57)
11
12use chrono::{DateTime, Utc};
13use rusqlite::{params, Connection};
14use serde::{Deserialize, Serialize};
15use std::collections::{HashMap, HashSet};
16
17use crate::error::{EngramError, Result};
18use crate::storage::queries::get_memory;
19use crate::types::{Memory, MemoryId};
20
21// ============================================================================
22// Types and Enums
23// ============================================================================
24
25/// Type of conflict between memories
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum ConflictType {
29    /// Direct contradiction in facts
30    Contradiction,
31    /// Outdated information
32    Staleness,
33    /// Duplicate content
34    Duplicate,
35    /// Semantic overlap
36    SemanticOverlap,
37    /// Inconsistent metadata
38    MetadataInconsistency,
39}
40
41impl ConflictType {
42    pub fn as_str(&self) -> &'static str {
43        match self {
44            ConflictType::Contradiction => "contradiction",
45            ConflictType::Staleness => "staleness",
46            ConflictType::Duplicate => "duplicate",
47            ConflictType::SemanticOverlap => "semantic_overlap",
48            ConflictType::MetadataInconsistency => "metadata_inconsistency",
49        }
50    }
51}
52
53impl std::str::FromStr for ConflictType {
54    type Err = EngramError;
55
56    fn from_str(s: &str) -> Result<Self> {
57        match s.to_lowercase().as_str() {
58            "contradiction" => Ok(ConflictType::Contradiction),
59            "staleness" => Ok(ConflictType::Staleness),
60            "duplicate" => Ok(ConflictType::Duplicate),
61            "semantic_overlap" => Ok(ConflictType::SemanticOverlap),
62            "metadata_inconsistency" => Ok(ConflictType::MetadataInconsistency),
63            _ => Err(EngramError::InvalidInput(format!(
64                "Unknown conflict type: {}",
65                s
66            ))),
67        }
68    }
69}
70
71/// Severity of a conflict
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum ConflictSeverity {
75    Low,
76    Medium,
77    High,
78    Critical,
79}
80
81impl ConflictSeverity {
82    pub fn as_str(&self) -> &'static str {
83        match self {
84            ConflictSeverity::Low => "low",
85            ConflictSeverity::Medium => "medium",
86            ConflictSeverity::High => "high",
87            ConflictSeverity::Critical => "critical",
88        }
89    }
90}
91
92impl std::str::FromStr for ConflictSeverity {
93    type Err = EngramError;
94
95    fn from_str(s: &str) -> Result<Self> {
96        match s.to_lowercase().as_str() {
97            "low" => Ok(ConflictSeverity::Low),
98            "medium" => Ok(ConflictSeverity::Medium),
99            "high" => Ok(ConflictSeverity::High),
100            "critical" => Ok(ConflictSeverity::Critical),
101            _ => Ok(ConflictSeverity::Medium),
102        }
103    }
104}
105
106/// Resolution type for conflicts
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum ResolutionType {
110    /// Keep memory A, archive B
111    KeepA,
112    /// Keep memory B, archive A
113    KeepB,
114    /// Merge both into new memory
115    Merge,
116    /// Keep both as-is (mark as reviewed)
117    KeepBoth,
118    /// Delete both
119    DeleteBoth,
120    /// Mark as false positive
121    FalsePositive,
122}
123
124impl ResolutionType {
125    pub fn as_str(&self) -> &'static str {
126        match self {
127            ResolutionType::KeepA => "keep_a",
128            ResolutionType::KeepB => "keep_b",
129            ResolutionType::Merge => "merge",
130            ResolutionType::KeepBoth => "keep_both",
131            ResolutionType::DeleteBoth => "delete_both",
132            ResolutionType::FalsePositive => "false_positive",
133        }
134    }
135}
136
137/// Validation status for memories
138#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
139#[serde(rename_all = "snake_case")]
140pub enum ValidationStatus {
141    Unverified,
142    Verified,
143    Disputed,
144    Stale,
145}
146
147impl ValidationStatus {
148    pub fn as_str(&self) -> &'static str {
149        match self {
150            ValidationStatus::Unverified => "unverified",
151            ValidationStatus::Verified => "verified",
152            ValidationStatus::Disputed => "disputed",
153            ValidationStatus::Stale => "stale",
154        }
155    }
156}
157
158// ============================================================================
159// Data Structures
160// ============================================================================
161
162/// A detected conflict between two memories
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct MemoryConflict {
165    pub id: i64,
166    pub memory_a_id: MemoryId,
167    pub memory_b_id: MemoryId,
168    pub conflict_type: ConflictType,
169    pub severity: ConflictSeverity,
170    pub description: Option<String>,
171    pub detected_at: DateTime<Utc>,
172    pub resolved_at: Option<DateTime<Utc>>,
173    pub resolution_type: Option<ResolutionType>,
174    pub resolution_notes: Option<String>,
175    pub auto_detected: bool,
176}
177
178/// A duplicate candidate pair
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct DuplicateCandidate {
181    pub id: i64,
182    pub memory_a_id: MemoryId,
183    pub memory_b_id: MemoryId,
184    pub similarity_score: f32,
185    pub similarity_type: String,
186    pub detected_at: DateTime<Utc>,
187    pub status: String,
188}
189
190/// Enhanced quality score with all components
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct EnhancedQualityScore {
193    pub overall: f32,
194    pub grade: char,
195    pub clarity: f32,
196    pub completeness: f32,
197    pub freshness: f32,
198    pub consistency: f32,
199    pub source_trust: f32,
200    pub suggestions: Vec<QualitySuggestion>,
201    pub calculated_at: DateTime<Utc>,
202}
203
204/// A quality improvement suggestion
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct QualitySuggestion {
207    pub category: String,
208    pub priority: String,
209    pub message: String,
210    pub action: Option<String>,
211}
212
213/// Source trust score
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct SourceTrustScore {
216    pub source_type: String,
217    pub source_identifier: Option<String>,
218    pub trust_score: f32,
219    pub verification_count: i32,
220    pub notes: Option<String>,
221}
222
223/// Quality report for a workspace or set of memories
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct QualityReport {
226    pub total_memories: i64,
227    pub average_quality: f32,
228    pub quality_distribution: HashMap<char, i64>,
229    pub top_issues: Vec<QualityIssue>,
230    pub conflicts_count: i64,
231    pub duplicates_count: i64,
232    pub suggestions_summary: Vec<String>,
233    pub generated_at: DateTime<Utc>,
234}
235
236/// A quality issue in the report
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct QualityIssue {
239    pub issue_type: String,
240    pub count: i64,
241    pub severity: String,
242    pub description: String,
243}
244
245// ============================================================================
246// Configuration
247// ============================================================================
248
249/// Configuration for context quality analysis
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct ContextQualityConfig {
252    /// Weight for clarity in quality score
253    pub clarity_weight: f32,
254    /// Weight for completeness
255    pub completeness_weight: f32,
256    /// Weight for freshness
257    pub freshness_weight: f32,
258    /// Weight for consistency
259    pub consistency_weight: f32,
260    /// Weight for source trust
261    pub source_trust_weight: f32,
262    /// Threshold for near-duplicate detection (0-1)
263    pub duplicate_threshold: f32,
264    /// Threshold for semantic similarity (0-1)
265    pub semantic_threshold: f32,
266    /// Days until memory is considered stale
267    pub staleness_days: i64,
268    /// Minimum content length for quality
269    pub min_content_length: usize,
270    /// Ideal content length
271    pub ideal_content_length: usize,
272}
273
274impl Default for ContextQualityConfig {
275    fn default() -> Self {
276        Self {
277            clarity_weight: 0.25,
278            completeness_weight: 0.20,
279            freshness_weight: 0.20,
280            consistency_weight: 0.20,
281            source_trust_weight: 0.15,
282            duplicate_threshold: 0.85,
283            semantic_threshold: 0.80,
284            staleness_days: 90,
285            min_content_length: 20,
286            ideal_content_length: 200,
287        }
288    }
289}
290
291// ============================================================================
292// Near-Duplicate Detection (ENG-48)
293// ============================================================================
294
295/// Calculate similarity between two strings using character n-grams
296pub fn calculate_text_similarity(text_a: &str, text_b: &str) -> f32 {
297    let ngram_size = 3;
298
299    fn get_ngrams(text: &str, n: usize) -> HashSet<String> {
300        let normalized: String = text
301            .to_lowercase()
302            .chars()
303            .filter(|c| !c.is_whitespace())
304            .collect();
305        if normalized.len() < n {
306            return HashSet::new();
307        }
308        normalized
309            .chars()
310            .collect::<Vec<_>>()
311            .windows(n)
312            .map(|w| w.iter().collect::<String>())
313            .collect()
314    }
315
316    let ngrams_a = get_ngrams(text_a, ngram_size);
317    let ngrams_b = get_ngrams(text_b, ngram_size);
318
319    if ngrams_a.is_empty() && ngrams_b.is_empty() {
320        return 1.0;
321    }
322    if ngrams_a.is_empty() || ngrams_b.is_empty() {
323        return 0.0;
324    }
325
326    let intersection = ngrams_a.intersection(&ngrams_b).count() as f32;
327    let union = ngrams_a.union(&ngrams_b).count() as f32;
328
329    intersection / union
330}
331
332/// Find near-duplicate memories using text similarity
333pub fn find_near_duplicates(
334    conn: &Connection,
335    threshold: f32,
336    limit: i64,
337) -> Result<Vec<DuplicateCandidate>> {
338    // Get memories that haven't been checked yet
339    let mut stmt = conn.prepare(
340        r#"
341        SELECT id, content FROM memories
342        WHERE deleted_at IS NULL
343        ORDER BY created_at DESC
344        LIMIT ?
345        "#,
346    )?;
347
348    let memories: Vec<(i64, String)> = stmt
349        .query_map(params![limit * 2], |row| Ok((row.get(0)?, row.get(1)?)))?
350        .filter_map(|r| r.ok())
351        .collect();
352
353    let mut duplicates = Vec::new();
354
355    // Compare pairs
356    for i in 0..memories.len() {
357        for j in (i + 1)..memories.len() {
358            let (id_a, content_a) = &memories[i];
359            let (id_b, content_b) = &memories[j];
360
361            let similarity = calculate_text_similarity(content_a, content_b);
362
363            if similarity >= threshold {
364                // Check if already recorded
365                let exists: bool = conn.query_row(
366                    "SELECT 1 FROM duplicate_candidates WHERE memory_a_id = ? AND memory_b_id = ?",
367                    params![id_a, id_b],
368                    |_| Ok(true),
369                ).unwrap_or(false);
370
371                if !exists {
372                    conn.execute(
373                        r#"
374                        INSERT OR IGNORE INTO duplicate_candidates
375                        (memory_a_id, memory_b_id, similarity_score, similarity_type)
376                        VALUES (?, ?, ?, 'content')
377                        "#,
378                        params![id_a, id_b, similarity],
379                    )?;
380
381                    duplicates.push(DuplicateCandidate {
382                        id: 0,
383                        memory_a_id: *id_a,
384                        memory_b_id: *id_b,
385                        similarity_score: similarity,
386                        similarity_type: "content".to_string(),
387                        detected_at: Utc::now(),
388                        status: "pending".to_string(),
389                    });
390                }
391            }
392        }
393    }
394
395    Ok(duplicates)
396}
397
398/// Get pending duplicate candidates
399pub fn get_pending_duplicates(conn: &Connection, limit: i64) -> Result<Vec<DuplicateCandidate>> {
400    let mut stmt = conn.prepare(
401        r#"
402        SELECT id, memory_a_id, memory_b_id, similarity_score, similarity_type, detected_at, status
403        FROM duplicate_candidates
404        WHERE status = 'pending'
405        ORDER BY similarity_score DESC
406        LIMIT ?
407        "#,
408    )?;
409
410    let duplicates = stmt
411        .query_map(params![limit], |row| {
412            Ok(DuplicateCandidate {
413                id: row.get(0)?,
414                memory_a_id: row.get(1)?,
415                memory_b_id: row.get(2)?,
416                similarity_score: row.get(3)?,
417                similarity_type: row.get(4)?,
418                detected_at: row
419                    .get::<_, String>(5)?
420                    .parse()
421                    .unwrap_or_else(|_| Utc::now()),
422                status: row.get(6)?,
423            })
424        })?
425        .filter_map(|r| r.ok())
426        .collect();
427
428    Ok(duplicates)
429}
430
431// ============================================================================
432// Semantic Deduplication (ENG-49)
433// ============================================================================
434
435/// Find semantic duplicates using embedding similarity
436pub fn find_semantic_duplicates(
437    conn: &Connection,
438    query_embedding: &[f32],
439    threshold: f32,
440    limit: i64,
441) -> Result<Vec<DuplicateCandidate>> {
442    // Use existing embedding search infrastructure
443    let mut stmt = conn.prepare(
444        r#"
445        SELECT m.id, e.embedding
446        FROM memories m
447        JOIN embeddings e ON m.id = e.memory_id
448        WHERE m.deleted_at IS NULL
449        LIMIT ?
450        "#,
451    )?;
452
453    let memories: Vec<(i64, Vec<f32>)> = stmt
454        .query_map(params![limit], |row| {
455            let id: i64 = row.get(0)?;
456            let embedding_blob: Vec<u8> = row.get(1)?;
457            let embedding: Vec<f32> = embedding_blob
458                .chunks(4)
459                .map(|chunk| {
460                    let bytes: [u8; 4] = chunk.try_into().unwrap_or([0; 4]);
461                    f32::from_le_bytes(bytes)
462                })
463                .collect();
464            Ok((id, embedding))
465        })?
466        .filter_map(|r| r.ok())
467        .collect();
468
469    let mut duplicates = Vec::new();
470
471    for (id, embedding) in &memories {
472        let similarity = cosine_similarity(query_embedding, embedding);
473        if similarity >= threshold {
474            duplicates.push(DuplicateCandidate {
475                id: 0,
476                memory_a_id: 0, // Query memory
477                memory_b_id: *id,
478                similarity_score: similarity,
479                similarity_type: "semantic".to_string(),
480                detected_at: Utc::now(),
481                status: "pending".to_string(),
482            });
483        }
484    }
485
486    Ok(duplicates)
487}
488
489/// Calculate cosine similarity between two vectors
490fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
491    if a.len() != b.len() || a.is_empty() {
492        return 0.0;
493    }
494
495    let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
496    let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
497    let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
498
499    if magnitude_a == 0.0 || magnitude_b == 0.0 {
500        return 0.0;
501    }
502
503    dot_product / (magnitude_a * magnitude_b)
504}
505
506// ============================================================================
507// Conflict Detection (ENG-50)
508// ============================================================================
509
510/// Detect conflicts for a memory against existing memories
511pub fn detect_conflicts(
512    conn: &Connection,
513    memory_id: MemoryId,
514    config: &ContextQualityConfig,
515) -> Result<Vec<MemoryConflict>> {
516    let memory = get_memory(conn, memory_id)?;
517    let mut conflicts = Vec::new();
518
519    // Find memories with similar tags or content that might conflict
520    let mut stmt = conn.prepare(
521        r#"
522        SELECT id, content, tags, updated_at
523        FROM memories
524        WHERE id != ? AND deleted_at IS NULL
525        AND (
526            -- Same workspace
527            workspace = (SELECT workspace FROM memories WHERE id = ?)
528            -- Or overlapping tags
529            OR EXISTS (
530                SELECT 1 FROM json_each(tags) t1
531                WHERE t1.value IN (SELECT value FROM json_each((SELECT tags FROM memories WHERE id = ?)))
532            )
533        )
534        LIMIT 100
535        "#,
536    )?;
537
538    let candidates: Vec<(i64, String, String, String)> = stmt
539        .query_map(params![memory_id, memory_id, memory_id], |row| {
540            Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
541        })?
542        .filter_map(|r| r.ok())
543        .collect();
544
545    for (other_id, other_content, _other_tags, other_updated) in candidates {
546        // Check for staleness conflict
547        let memory_date: DateTime<Utc> = memory.updated_at;
548        let other_date: DateTime<Utc> = other_updated.parse().unwrap_or(memory_date);
549        let days_diff = (memory_date - other_date).num_days().abs();
550
551        if days_diff > config.staleness_days {
552            // Check content similarity to see if they're about the same topic
553            let similarity = calculate_text_similarity(&memory.content, &other_content);
554            if similarity > 0.3 {
555                let conflict = create_conflict(
556                    conn,
557                    memory_id,
558                    other_id,
559                    ConflictType::Staleness,
560                    ConflictSeverity::Medium,
561                    Some(format!(
562                        "Memories differ by {} days and have {:.0}% content similarity",
563                        days_diff,
564                        similarity * 100.0
565                    )),
566                )?;
567                conflicts.push(conflict);
568            }
569        }
570
571        // Check for duplicate/overlap
572        let similarity = calculate_text_similarity(&memory.content, &other_content);
573        if similarity >= config.duplicate_threshold {
574            let conflict = create_conflict(
575                conn,
576                memory_id,
577                other_id,
578                ConflictType::Duplicate,
579                ConflictSeverity::High,
580                Some(format!("Content similarity: {:.0}%", similarity * 100.0)),
581            )?;
582            conflicts.push(conflict);
583        } else if similarity >= config.semantic_threshold {
584            let conflict = create_conflict(
585                conn,
586                memory_id,
587                other_id,
588                ConflictType::SemanticOverlap,
589                ConflictSeverity::Low,
590                Some(format!("Semantic overlap: {:.0}%", similarity * 100.0)),
591            )?;
592            conflicts.push(conflict);
593        }
594    }
595
596    Ok(conflicts)
597}
598
599/// Create a conflict record
600fn create_conflict(
601    conn: &Connection,
602    memory_a_id: MemoryId,
603    memory_b_id: MemoryId,
604    conflict_type: ConflictType,
605    severity: ConflictSeverity,
606    description: Option<String>,
607) -> Result<MemoryConflict> {
608    let now = Utc::now();
609    let now_str = now.to_rfc3339();
610
611    conn.execute(
612        r#"
613        INSERT OR IGNORE INTO memory_conflicts
614        (memory_a_id, memory_b_id, conflict_type, severity, description, detected_at)
615        VALUES (?, ?, ?, ?, ?, ?)
616        "#,
617        params![
618            memory_a_id,
619            memory_b_id,
620            conflict_type.as_str(),
621            severity.as_str(),
622            description,
623            now_str
624        ],
625    )?;
626
627    let id = conn.last_insert_rowid();
628
629    Ok(MemoryConflict {
630        id,
631        memory_a_id,
632        memory_b_id,
633        conflict_type,
634        severity,
635        description,
636        detected_at: now,
637        resolved_at: None,
638        resolution_type: None,
639        resolution_notes: None,
640        auto_detected: true,
641    })
642}
643
644/// Get unresolved conflicts
645pub fn get_unresolved_conflicts(conn: &Connection, limit: i64) -> Result<Vec<MemoryConflict>> {
646    let mut stmt = conn.prepare(
647        r#"
648        SELECT id, memory_a_id, memory_b_id, conflict_type, severity, description,
649               detected_at, resolved_at, resolution_type, resolution_notes, auto_detected
650        FROM memory_conflicts
651        WHERE resolved_at IS NULL
652        ORDER BY
653            CASE severity
654                WHEN 'critical' THEN 1
655                WHEN 'high' THEN 2
656                WHEN 'medium' THEN 3
657                ELSE 4
658            END,
659            detected_at DESC
660        LIMIT ?
661        "#,
662    )?;
663
664    let conflicts = stmt
665        .query_map(params![limit], |row| {
666            Ok(MemoryConflict {
667                id: row.get(0)?,
668                memory_a_id: row.get(1)?,
669                memory_b_id: row.get(2)?,
670                conflict_type: row
671                    .get::<_, String>(3)?
672                    .parse()
673                    .unwrap_or(ConflictType::Contradiction),
674                severity: row
675                    .get::<_, String>(4)?
676                    .parse()
677                    .unwrap_or(ConflictSeverity::Medium),
678                description: row.get(5)?,
679                detected_at: row
680                    .get::<_, String>(6)?
681                    .parse()
682                    .unwrap_or_else(|_| Utc::now()),
683                resolved_at: row
684                    .get::<_, Option<String>>(7)?
685                    .and_then(|s| s.parse().ok()),
686                resolution_type: None,
687                resolution_notes: row.get(9)?,
688                auto_detected: row.get::<_, i32>(10)? == 1,
689            })
690        })?
691        .filter_map(|r| r.ok())
692        .collect();
693
694    Ok(conflicts)
695}
696
697// ============================================================================
698// Contradiction Resolution (ENG-51)
699// ============================================================================
700
701/// Resolve a conflict between memories
702pub fn resolve_conflict(
703    conn: &Connection,
704    conflict_id: i64,
705    resolution_type: ResolutionType,
706    notes: Option<&str>,
707) -> Result<()> {
708    let now = Utc::now().to_rfc3339();
709
710    conn.execute(
711        r#"
712        UPDATE memory_conflicts
713        SET resolved_at = ?, resolution_type = ?, resolution_notes = ?
714        WHERE id = ?
715        "#,
716        params![now, resolution_type.as_str(), notes, conflict_id],
717    )?;
718
719    // Apply resolution
720    let (memory_a_id, memory_b_id): (i64, i64) = conn.query_row(
721        "SELECT memory_a_id, memory_b_id FROM memory_conflicts WHERE id = ?",
722        params![conflict_id],
723        |row| Ok((row.get(0)?, row.get(1)?)),
724    )?;
725
726    match resolution_type {
727        ResolutionType::KeepA => {
728            // Archive memory B
729            conn.execute(
730                "UPDATE memories SET lifecycle_state = 'archived' WHERE id = ?",
731                params![memory_b_id],
732            )?;
733        }
734        ResolutionType::KeepB => {
735            // Archive memory A
736            conn.execute(
737                "UPDATE memories SET lifecycle_state = 'archived' WHERE id = ?",
738                params![memory_a_id],
739            )?;
740        }
741        ResolutionType::DeleteBoth => {
742            let now = Utc::now().to_rfc3339();
743            conn.execute(
744                "UPDATE memories SET deleted_at = ? WHERE id IN (?, ?)",
745                params![now, memory_a_id, memory_b_id],
746            )?;
747        }
748        _ => {
749            // KeepBoth, Merge, FalsePositive - no automatic action
750        }
751    }
752
753    Ok(())
754}
755
756// ============================================================================
757// Enhanced Quality Scoring (ENG-52)
758// ============================================================================
759
760/// Calculate enhanced quality score for a memory
761pub fn calculate_quality_score(
762    conn: &Connection,
763    memory_id: MemoryId,
764    config: &ContextQualityConfig,
765) -> Result<EnhancedQualityScore> {
766    let memory = get_memory(conn, memory_id)?;
767
768    let clarity = score_clarity(&memory);
769    let completeness = score_completeness(&memory, config);
770    let freshness = score_freshness(&memory, config);
771    let consistency = score_consistency(conn, memory_id)?;
772    let source_trust = get_source_trust_for_memory(conn, &memory)?;
773
774    let overall = clarity * config.clarity_weight
775        + completeness * config.completeness_weight
776        + freshness * config.freshness_weight
777        + consistency * config.consistency_weight
778        + source_trust * config.source_trust_weight;
779
780    let grade = match overall {
781        s if s >= 0.9 => 'A',
782        s if s >= 0.8 => 'B',
783        s if s >= 0.7 => 'C',
784        s if s >= 0.6 => 'D',
785        _ => 'F',
786    };
787
788    let suggestions = generate_quality_suggestions(
789        &memory,
790        clarity,
791        completeness,
792        freshness,
793        consistency,
794        source_trust,
795    );
796
797    // Record in history
798    let now = Utc::now();
799    conn.execute(
800        r#"
801        INSERT INTO quality_history
802        (memory_id, quality_score, clarity_score, completeness_score, freshness_score, consistency_score, source_trust_score)
803        VALUES (?, ?, ?, ?, ?, ?, ?)
804        "#,
805        params![memory_id, overall, clarity, completeness, freshness, consistency, source_trust],
806    )?;
807
808    // Update memory quality score
809    conn.execute(
810        "UPDATE memories SET quality_score = ? WHERE id = ?",
811        params![overall, memory_id],
812    )?;
813
814    Ok(EnhancedQualityScore {
815        overall,
816        grade,
817        clarity,
818        completeness,
819        freshness,
820        consistency,
821        source_trust,
822        suggestions,
823        calculated_at: now,
824    })
825}
826
827fn score_clarity(memory: &Memory) -> f32 {
828    let content = &memory.content;
829    let mut score: f32 = 0.5;
830
831    // Sentence structure
832    let sentence_count =
833        content.matches('.').count() + content.matches('!').count() + content.matches('?').count();
834    if sentence_count > 0 {
835        score += 0.15;
836    }
837
838    // Word clarity
839    let word_count = content.split_whitespace().count();
840    if word_count > 0 {
841        let avg_word_len: f32 = content
842            .split_whitespace()
843            .map(|w| w.len() as f32)
844            .sum::<f32>()
845            / word_count as f32;
846
847        if (3.0..=10.0).contains(&avg_word_len) {
848            score += 0.2;
849        }
850    }
851
852    // Has organization (tags)
853    if !memory.tags.is_empty() {
854        score += 0.15;
855    }
856
857    score.min(1.0)
858}
859
860fn score_completeness(memory: &Memory, config: &ContextQualityConfig) -> f32 {
861    let len = memory.content.len();
862
863    if len < config.min_content_length {
864        return 0.3;
865    }
866
867    if len >= config.ideal_content_length {
868        return 1.0;
869    }
870
871    let range = (config.ideal_content_length - config.min_content_length) as f32;
872    let progress = (len - config.min_content_length) as f32;
873    0.3 + 0.7 * (progress / range)
874}
875
876fn score_freshness(memory: &Memory, config: &ContextQualityConfig) -> f32 {
877    let age_days = (Utc::now() - memory.updated_at).num_days() as f32;
878    let staleness = config.staleness_days as f32;
879
880    if age_days <= 0.0 {
881        1.0
882    } else if age_days >= staleness {
883        0.2
884    } else {
885        1.0 - 0.8 * (age_days / staleness)
886    }
887}
888
889fn score_consistency(conn: &Connection, memory_id: MemoryId) -> Result<f32> {
890    // Check for unresolved conflicts
891    let conflict_count: i64 = conn.query_row(
892        r#"
893        SELECT COUNT(*) FROM memory_conflicts
894        WHERE (memory_a_id = ? OR memory_b_id = ?) AND resolved_at IS NULL
895        "#,
896        params![memory_id, memory_id],
897        |row| row.get(0),
898    )?;
899
900    Ok(match conflict_count {
901        0 => 1.0,
902        1 => 0.7,
903        2 => 0.5,
904        _ => 0.3,
905    })
906}
907
908fn get_source_trust_for_memory(conn: &Connection, memory: &Memory) -> Result<f32> {
909    // Determine source type from metadata
910    let source_type = memory
911        .metadata
912        .get("origin")
913        .and_then(|v| v.as_str())
914        .unwrap_or("user");
915
916    let trust_score: f32 = conn
917        .query_row(
918            "SELECT trust_score FROM source_trust_scores WHERE source_type = ?",
919            params![source_type],
920            |row| row.get(0),
921        )
922        .unwrap_or(0.7);
923
924    Ok(trust_score)
925}
926
927fn generate_quality_suggestions(
928    memory: &Memory,
929    clarity: f32,
930    completeness: f32,
931    freshness: f32,
932    consistency: f32,
933    _source_trust: f32,
934) -> Vec<QualitySuggestion> {
935    let mut suggestions = Vec::new();
936
937    if completeness < 0.5 {
938        suggestions.push(QualitySuggestion {
939            category: "completeness".to_string(),
940            priority: "high".to_string(),
941            message: "Add more detail to make this memory more useful".to_string(),
942            action: Some("expand".to_string()),
943        });
944    }
945
946    if clarity < 0.5 {
947        suggestions.push(QualitySuggestion {
948            category: "clarity".to_string(),
949            priority: "medium".to_string(),
950            message: "Consider adding structure with clear sentences".to_string(),
951            action: Some("restructure".to_string()),
952        });
953    }
954
955    if memory.tags.is_empty() {
956        suggestions.push(QualitySuggestion {
957            category: "organization".to_string(),
958            priority: "low".to_string(),
959            message: "Add tags to improve organization and searchability".to_string(),
960            action: Some("add_tags".to_string()),
961        });
962    }
963
964    if freshness < 0.3 {
965        suggestions.push(QualitySuggestion {
966            category: "freshness".to_string(),
967            priority: "medium".to_string(),
968            message: "This memory may be outdated - consider reviewing".to_string(),
969            action: Some("review".to_string()),
970        });
971    }
972
973    if consistency < 0.5 {
974        suggestions.push(QualitySuggestion {
975            category: "consistency".to_string(),
976            priority: "high".to_string(),
977            message: "This memory has unresolved conflicts - review and resolve".to_string(),
978            action: Some("resolve_conflicts".to_string()),
979        });
980    }
981
982    suggestions
983}
984
985// ============================================================================
986// Quality Report (ENG-64)
987// ============================================================================
988
989/// Generate a quality report for a workspace
990pub fn generate_quality_report(
991    conn: &Connection,
992    workspace: Option<&str>,
993) -> Result<QualityReport> {
994    let workspace_filter = workspace.unwrap_or("default");
995
996    // Total memories
997    let total_memories: i64 = conn.query_row(
998        "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL",
999        params![workspace_filter],
1000        |row| row.get(0),
1001    )?;
1002
1003    // Average quality
1004    let average_quality: f32 = conn
1005        .query_row(
1006            "SELECT COALESCE(AVG(quality_score), 0.5) FROM memories WHERE workspace = ? AND deleted_at IS NULL",
1007            params![workspace_filter],
1008            |row| row.get(0),
1009        )
1010        .unwrap_or(0.5);
1011
1012    // Quality distribution
1013    let mut distribution = HashMap::new();
1014    let grades = ['A', 'B', 'C', 'D', 'F'];
1015    for grade in grades {
1016        let (min, max) = match grade {
1017            'A' => (0.9, 1.1),
1018            'B' => (0.8, 0.9),
1019            'C' => (0.7, 0.8),
1020            'D' => (0.6, 0.7),
1021            _ => (0.0, 0.6),
1022        };
1023        let count: i64 = conn.query_row(
1024            "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL AND quality_score >= ? AND quality_score < ?",
1025            params![workspace_filter, min, max],
1026            |row| row.get(0),
1027        ).unwrap_or(0);
1028        distribution.insert(grade, count);
1029    }
1030
1031    // Conflicts count
1032    let conflicts_count: i64 = conn.query_row(
1033        "SELECT COUNT(*) FROM memory_conflicts WHERE resolved_at IS NULL",
1034        [],
1035        |row| row.get(0),
1036    )?;
1037
1038    // Duplicates count
1039    let duplicates_count: i64 = conn.query_row(
1040        "SELECT COUNT(*) FROM duplicate_candidates WHERE status = 'pending'",
1041        [],
1042        |row| row.get(0),
1043    )?;
1044
1045    // Top issues
1046    let mut top_issues = Vec::new();
1047
1048    if conflicts_count > 0 {
1049        top_issues.push(QualityIssue {
1050            issue_type: "conflicts".to_string(),
1051            count: conflicts_count,
1052            severity: "high".to_string(),
1053            description: format!("{} unresolved conflicts detected", conflicts_count),
1054        });
1055    }
1056
1057    if duplicates_count > 0 {
1058        top_issues.push(QualityIssue {
1059            issue_type: "duplicates".to_string(),
1060            count: duplicates_count,
1061            severity: "medium".to_string(),
1062            description: format!("{} potential duplicates found", duplicates_count),
1063        });
1064    }
1065
1066    // Low quality count
1067    let low_quality_count: i64 = conn.query_row(
1068        "SELECT COUNT(*) FROM memories WHERE workspace = ? AND deleted_at IS NULL AND quality_score < 0.5",
1069        params![workspace_filter],
1070        |row| row.get(0),
1071    ).unwrap_or(0);
1072
1073    if low_quality_count > 0 {
1074        top_issues.push(QualityIssue {
1075            issue_type: "low_quality".to_string(),
1076            count: low_quality_count,
1077            severity: "medium".to_string(),
1078            description: format!("{} memories with low quality scores", low_quality_count),
1079        });
1080    }
1081
1082    let suggestions_summary = vec![
1083        format!("Average quality score: {:.0}%", average_quality * 100.0),
1084        format!("Total memories: {}", total_memories),
1085        if conflicts_count > 0 {
1086            format!(
1087                "Resolve {} conflicts to improve consistency",
1088                conflicts_count
1089            )
1090        } else {
1091            "No conflicts detected".to_string()
1092        },
1093    ];
1094
1095    Ok(QualityReport {
1096        total_memories,
1097        average_quality,
1098        quality_distribution: distribution,
1099        top_issues,
1100        conflicts_count,
1101        duplicates_count,
1102        suggestions_summary,
1103        generated_at: Utc::now(),
1104    })
1105}
1106
1107// ============================================================================
1108// Source Trust (ENG-53)
1109// ============================================================================
1110
1111/// Get or set source trust score
1112pub fn get_source_trust(
1113    conn: &Connection,
1114    source_type: &str,
1115    source_identifier: Option<&str>,
1116) -> Result<SourceTrustScore> {
1117    let identifier = source_identifier.unwrap_or("default");
1118
1119    let result = conn.query_row(
1120        r#"
1121        SELECT source_type, source_identifier, trust_score, verification_count, notes
1122        FROM source_trust_scores
1123        WHERE source_type = ? AND (source_identifier = ? OR source_identifier IS NULL)
1124        ORDER BY source_identifier DESC
1125        LIMIT 1
1126        "#,
1127        params![source_type, identifier],
1128        |row| {
1129            Ok(SourceTrustScore {
1130                source_type: row.get(0)?,
1131                source_identifier: row.get(1)?,
1132                trust_score: row.get(2)?,
1133                verification_count: row.get(3)?,
1134                notes: row.get(4)?,
1135            })
1136        },
1137    );
1138
1139    result.map_err(|_| EngramError::NotFound(0))
1140}
1141
1142/// Update source trust score
1143pub fn update_source_trust(
1144    conn: &Connection,
1145    source_type: &str,
1146    source_identifier: Option<&str>,
1147    trust_score: f32,
1148    notes: Option<&str>,
1149) -> Result<()> {
1150    let now = Utc::now().to_rfc3339();
1151
1152    conn.execute(
1153        r#"
1154        INSERT INTO source_trust_scores (source_type, source_identifier, trust_score, notes, updated_at)
1155        VALUES (?, ?, ?, ?, ?)
1156        ON CONFLICT(source_type, source_identifier)
1157        DO UPDATE SET trust_score = ?, notes = ?, updated_at = ?
1158        "#,
1159        params![
1160            source_type,
1161            source_identifier,
1162            trust_score,
1163            notes,
1164            now,
1165            trust_score,
1166            notes,
1167            now
1168        ],
1169    )?;
1170
1171    Ok(())
1172}
1173
1174// ============================================================================
1175// Tests
1176// ============================================================================
1177
1178#[cfg(test)]
1179mod tests {
1180    use super::*;
1181
1182    #[test]
1183    fn test_text_similarity() {
1184        let a = "The quick brown fox jumps over the lazy dog";
1185        let b = "The quick brown fox jumps over the lazy cat";
1186        let c = "Something completely different";
1187
1188        let sim_ab = calculate_text_similarity(a, b);
1189        let sim_ac = calculate_text_similarity(a, c);
1190
1191        assert!(sim_ab > 0.8, "Similar texts should have high similarity");
1192        assert!(sim_ac < 0.3, "Different texts should have low similarity");
1193    }
1194
1195    #[test]
1196    fn test_cosine_similarity() {
1197        let a = vec![1.0, 0.0, 0.0];
1198        let b = vec![1.0, 0.0, 0.0];
1199        let c = vec![0.0, 1.0, 0.0];
1200
1201        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 0.001);
1202        assert!(cosine_similarity(&a, &c).abs() < 0.001);
1203    }
1204
1205    #[test]
1206    fn test_conflict_type_parsing() {
1207        assert_eq!(
1208            "contradiction".parse::<ConflictType>().unwrap(),
1209            ConflictType::Contradiction
1210        );
1211        assert_eq!(
1212            "duplicate".parse::<ConflictType>().unwrap(),
1213            ConflictType::Duplicate
1214        );
1215    }
1216}
engram/intelligence/context_quality.rs

engram/intelligence/
context_quality.rs