difflore-core 0.1.0

Core library for the difflore CLI — rule store, retrieval, MCP server, hooks, cloud sync. Not intended for direct use; depend on `difflore-cli` instead.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
//! Local-only rule outcome telemetry.
//!
//! Records when a rule is surfaced by recall (`kind = 'recalled'`) so
//! `difflore memory` can show "what rules are actually pulling weight"
//! and `rules show` can include a use-count line.
//!
//! Fix-acceptance telemetry already lives in `fix_outcomes` (richer
//! schema with file path + applied/failed split); this table is just
//! the recall-side ledger. Both are read together by the surfaces.
//!
//! Data never leaves the device. Personal usage signal is not uploaded
//! to cloud — cluster precision is derived cloud-side over consented
//! aggregates only.

use sqlx::SqlitePool;

pub const KIND_RECALLED: &str = "recalled";

#[derive(Debug, Clone)]
pub struct RuleRecallInput<'a> {
    pub rule_id: &'a str,
    pub session_id: Option<&'a str>,
    pub repo_full_name: Option<&'a str>,
    pub file_path: Option<&'a str>,
    pub query_text: &'a str,
    pub rank: i64,
    pub top_k: i64,
    pub strict_file_match: bool,
}

/// Insert one row per recalled rule. No-op when `rule_ids` is empty.
pub async fn record_recalled(pool: &SqlitePool, rule_ids: &[String]) -> crate::Result<()> {
    if rule_ids.is_empty() {
        return Ok(());
    }
    let mut tx = pool.begin().await?;
    for id in rule_ids {
        sqlx::query!(
            "INSERT INTO rule_outcomes (rule_id, kind) VALUES (?1, ?2)",
            id,
            KIND_RECALLED
        )
        .execute(&mut *tx)
        .await?;
    }
    tx.commit().await?;
    Ok(())
}

/// Insert recall rows with enough low-sensitive context to prove a value loop.
///
/// We store hashes and scope, not prompt text or source code. `rank <= 3` is
/// the key bit the buyer-value gate needs: the accepted rule was not merely
/// somewhere in a corpus, it was one of the memories an agent would actually see.
pub async fn record_recalled_with_context(
    pool: &SqlitePool,
    recalls: &[RuleRecallInput<'_>],
) -> crate::Result<()> {
    if recalls.is_empty() {
        return Ok(());
    }
    let mut tx = pool.begin().await?;
    for recall in recalls {
        let query_hash = crate::mcp_rule_serves::query_hash(recall.query_text);
        let rank = recall.rank.max(1);
        let top_k = recall.top_k.max(1);
        let strict = i64::from(recall.strict_file_match);
        sqlx::query!(
            "INSERT INTO rule_outcomes
             (rule_id, kind, session_id, repo_full_name, file_path, query_hash,
              rank, top_k, strict_file_match)
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
            recall.rule_id,
            KIND_RECALLED,
            recall.session_id,
            recall.repo_full_name,
            recall.file_path,
            query_hash,
            rank,
            top_k,
            strict,
        )
        .execute(&mut *tx)
        .await?;
    }
    tx.commit().await?;
    Ok(())
}

#[derive(Debug, Clone, sqlx::FromRow)]
pub struct RecallCount {
    pub rule_id: String,
    pub count: i64,
}

#[derive(Debug, Clone, sqlx::FromRow)]
pub struct RecallSummary {
    pub recall_events: i64,
    pub recalled_rules: i64,
}

#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
pub struct AcceptedFixEvidence {
    pub file_path: Option<String>,
    pub created_at: String,
}

#[derive(Debug, Clone, PartialEq, Eq, sqlx::FromRow)]
pub struct TopRecallEvidence {
    pub rule_id: String,
    pub repo_full_name: Option<String>,
    pub file_path: Option<String>,
    pub rank: i64,
    pub top_k: i64,
    pub strict_file_match: bool,
    pub recalled_at: String,
}

/// Total local recall proof over the last `days` days.
pub async fn summary(pool: &SqlitePool, days: i64) -> crate::Result<RecallSummary> {
    let days = days.max(1);
    let window = format!("-{days} days");
    let row = sqlx::query_as!(
        RecallSummary,
        r#"SELECT
             COUNT(*) AS "recall_events!: i64",
             COUNT(DISTINCT rule_id) AS "recalled_rules!: i64"
         FROM rule_outcomes
         WHERE kind = 'recalled'
           AND datetime(created_at) >= datetime('now', ?1)"#,
        window,
    )
    .fetch_one(pool)
    .await?;
    Ok(row)
}

/// Top-N rules by recall events within the last `days` days.
pub async fn top_recalled(
    pool: &SqlitePool,
    days: i64,
    limit: i64,
) -> crate::Result<Vec<RecallCount>> {
    let days = days.max(1);
    let limit = limit.max(1);
    let window = format!("-{days} days");
    // INNER JOIN against `skills` so deleted rules do not surface as zombie
    // rows. Without this filter, `difflore memory` would render the bare
    // rule_id when the join fell through to fallback.
    let rows = sqlx::query_as!(
        RecallCount,
        r#"SELECT o.rule_id AS "rule_id!: String", COUNT(*) AS "count!: i64"
         FROM rule_outcomes o
         INNER JOIN skills s ON s.id = o.rule_id
         WHERE o.kind = 'recalled'
           AND datetime(o.created_at) >= datetime('now', ?1)
         GROUP BY o.rule_id
         ORDER BY COUNT(*) DESC, o.rule_id ASC
         LIMIT ?2"#,
        window,
        limit
    )
    .fetch_all(pool)
    .await?;
    Ok(rows)
}

/// Total recall count for a single rule over the last `days` days.
pub async fn recall_count_for(pool: &SqlitePool, rule_id: &str, days: i64) -> crate::Result<i64> {
    let days = days.max(1);
    let window = format!("-{days} days");
    let n: i64 = sqlx::query_scalar!(
        r#"SELECT COUNT(*) AS "n!: i64" FROM rule_outcomes
         WHERE kind = 'recalled' AND rule_id = ?1
           AND datetime(created_at) >= datetime('now', ?2)"#,
        rule_id,
        window
    )
    .fetch_one(pool)
    .await?;
    Ok(n)
}

pub async fn latest_top3_recall_for(
    pool: &SqlitePool,
    rule_id: &str,
    days: i64,
) -> crate::Result<Option<TopRecallEvidence>> {
    let days = days.max(1);
    let window = format!("-{days} days");
    let row = sqlx::query_as!(
        TopRecallEvidence,
        r#"SELECT rule_id AS "rule_id!: String",
                repo_full_name,
                file_path,
                COALESCE(rank, 999) AS "rank!: i64",
                COALESCE(top_k, 0) AS "top_k!: i64",
                strict_file_match != 0 AS "strict_file_match!: bool",
                created_at AS "recalled_at!: String"
         FROM rule_outcomes
         WHERE kind = 'recalled'
           AND rule_id = ?1
           AND rank BETWEEN 1 AND 3
           AND datetime(created_at) >= datetime('now', ?2)
         ORDER BY datetime(created_at) DESC, id DESC
         LIMIT 1"#,
        rule_id,
        window,
    )
    .fetch_optional(pool)
    .await?;
    Ok(row)
}

/// How many `fix_outcomes` rows for this rule were `accepted = 1 AND
/// applied_ok = 1` within the window. Read here so the memory/show
/// surfaces have a single import path for "rule outcome" reads.
pub async fn fix_accepted_count_for(
    pool: &SqlitePool,
    rule_id: &str,
    days: i64,
) -> crate::Result<i64> {
    let days = days.max(1);
    let window = format!("-{days} days");
    let n: i64 = sqlx::query_scalar!(
        r#"SELECT COUNT(*) AS "n!: i64" FROM fix_outcomes
         WHERE rule_id = ?1 AND accepted = 1 AND applied_ok = 1
           AND datetime(created_at) >= datetime('now', ?2)"#,
        rule_id,
        window
    )
    .fetch_one(pool)
    .await?;
    Ok(n)
}

pub async fn latest_accepted_fix_for(
    pool: &SqlitePool,
    rule_id: &str,
    days: i64,
) -> crate::Result<Option<AcceptedFixEvidence>> {
    let days = days.max(1);
    let window = format!("-{days} days");
    let row = sqlx::query_as!(
        AcceptedFixEvidence,
        r#"SELECT file_path, created_at AS "created_at!: String"
         FROM fix_outcomes
         WHERE rule_id = ?1
           AND accepted = 1
           AND applied_ok = 1
           AND datetime(created_at) >= datetime('now', ?2)
         ORDER BY datetime(created_at) DESC, id DESC
         LIMIT 1"#,
        rule_id,
        window,
    )
    .fetch_optional(pool)
    .await?;
    Ok(row)
}

#[cfg(test)]
#[allow(clippy::str_to_string)] // reason: test code — failure should panic with context.
mod tests {
    use super::*;
    use sqlx::sqlite::SqlitePoolOptions;

    async fn setup() -> SqlitePool {
        let pool = SqlitePoolOptions::new()
            .max_connections(1)
            .connect("sqlite::memory:")
            .await
            .expect("open pool");
        sqlx::migrate!("./migrations")
            .run(&pool)
            .await
            .expect("apply migrations");
        pool
    }

    async fn insert_skill(pool: &SqlitePool, id: &str, name: &str) {
        sqlx::query!(
            "INSERT INTO skills (id, name, source, directory, version)
             VALUES (?1, ?2, 'manual', '/tmp', '1.0.0')",
            id,
            name,
        )
        .execute(pool)
        .await
        .expect("insert skill");
    }

    /// Locks in the fix for the `difflore memory` zombie-rule bug:
    /// recall events whose owning rule has been deleted from `skills`
    /// must not surface in the Most-recalled list.
    #[tokio::test]
    async fn top_recalled_excludes_deleted_rules() {
        let pool = setup().await;
        insert_skill(&pool, "r1", "Real rule").await;
        insert_skill(&pool, "r2", "Soon-deleted rule").await;

        record_recalled(&pool, &["r1".to_owned()])
            .await
            .expect("record r1");
        record_recalled(&pool, &["r2".to_owned(), "r2".to_owned()])
            .await
            .expect("record r2");

        // Drop the rule but keep its outcome rows (idempotent design).
        sqlx::query!("DELETE FROM skills WHERE id = 'r2'")
            .execute(&pool)
            .await
            .expect("delete r2");

        let rows = top_recalled(&pool, 7, 10).await.expect("top_recalled");
        let ids: Vec<&str> = rows.iter().map(|r| r.rule_id.as_str()).collect();
        assert!(ids.contains(&"r1"), "real rule should appear: {ids:?}");
        assert!(
            !ids.contains(&"r2"),
            "deleted rule must not appear: {ids:?}"
        );
    }

    #[tokio::test]
    async fn summary_counts_local_recall_events_and_distinct_rules() {
        let pool = setup().await;
        record_recalled(&pool, &["r1".to_owned(), "r2".to_owned()])
            .await
            .expect("record first recall");
        record_recalled(&pool, &["r2".to_owned()])
            .await
            .expect("record second recall");

        let row = summary(&pool, 30).await.expect("summary");
        assert_eq!(row.recall_events, 3);
        assert_eq!(row.recalled_rules, 2);
    }

    #[tokio::test]
    async fn latest_top3_recall_for_requires_ranked_recall_context() {
        let pool = setup().await;
        record_recalled_with_context(
            &pool,
            &[
                RuleRecallInput {
                    rule_id: "r1",
                    session_id: Some("session-1"),
                    repo_full_name: Some("acme/widgets"),
                    file_path: Some("src/auth.rs"),
                    query_text: "src/auth.rs validate auth token",
                    rank: 4,
                    top_k: 5,
                    strict_file_match: true,
                },
                RuleRecallInput {
                    rule_id: "r1",
                    session_id: Some("session-1"),
                    repo_full_name: Some("acme/widgets"),
                    file_path: Some("src/auth.rs"),
                    query_text: "src/auth.rs validate auth token",
                    rank: 2,
                    top_k: 5,
                    strict_file_match: true,
                },
            ],
        )
        .await
        .expect("record ranked recall");

        let recall = latest_top3_recall_for(&pool, "r1", 30)
            .await
            .expect("latest recall")
            .expect("top3 recall");

        assert_eq!(recall.rank, 2);
        assert_eq!(recall.top_k, 5);
        assert_eq!(recall.repo_full_name.as_deref(), Some("acme/widgets"));
        assert_eq!(recall.file_path.as_deref(), Some("src/auth.rs"));
        assert!(recall.strict_file_match);
    }

    #[tokio::test]
    async fn latest_accepted_fix_for_returns_newest_applied_fix() {
        let pool = setup().await;
        insert_skill(&pool, "r1", "Real rule").await;
        sqlx::query!(
            "INSERT INTO fix_outcomes
             (id, rule_id, rule_name, file_path, accepted, applied_ok, created_at)
             VALUES
             ('f-old', 'r1', 'Real rule', 'src/old.rs', 1, 1, '2026-05-01 00:00:00'),
             ('f-new', 'r1', 'Real rule', 'src/new.rs', 1, 1, datetime('now')),
             ('f-rejected', 'r1', 'Real rule', 'src/rejected.rs', 0, 0, datetime('now'))",
        )
        .execute(&pool)
        .await
        .expect("insert fix outcomes");

        let latest = latest_accepted_fix_for(&pool, "r1", 30)
            .await
            .expect("latest accepted fix")
            .expect("some accepted fix");
        assert_eq!(latest.file_path.as_deref(), Some("src/new.rs"));
    }
}