difflore-core 0.1.0

Core library for the difflore CLI — rule store, retrieval, MCP server, hooks, cloud sync. Not intended for direct use; depend on `difflore-cli` instead.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
//! Pure scoring helpers for hybrid rule retrieval.
//!
//! Split out of `rules.rs` so the main hybrid-retrieval orchestration stays
//! focused on fusion and result-set shaping. These functions are all pure
//! (no I/O): the intent-alignment directive matchers and the category-keyed
//! confidence-decay model. The orchestration in `rules.rs` calls them via
//! `super::scoring::…`; `effective_confidence`, `infer_rule_kind`, and
//! `RuleKind` are re-exported from `super` (the retrieval module) for external
//! callers.

use super::{
    MIN_DISTINCTIVE_SHARED_TERMS, MIN_INTENT_DIRECTIVE_OVERLAP, MIN_INTENT_DIRECTIVE_OVERLAP_RATIO,
    rule_title,
};

/// Distil the part of a rule's indexed content that carries its DIRECTIVE —
/// the imperative + its object — for intent-alignment matching.
///
/// The indexed `content` is `Rule ID: …\nRule Name: <title>\nType: …\nTags:
/// …\n\n<body>`. The `Rule Name:` line is the human-authored distillation of
/// what the rule tells you to do ("Return false instead of panic on invalid
/// input"), so it is the highest-signal directive text. We also fold in a
/// SHORT leading slice of the body so a directive phrased only in the body
/// (a title like "Error handling") still contributes its action/subject
/// terms — but we cap it so a long body can't drown the gate in incidental
/// vocabulary that re-introduces the topical-adjacency noise the gate exists
/// to remove.
///
/// `BODY_DIRECTIVE_CHARS` is the leading-body budget: enough to capture the
/// opening imperative sentence (the rule's "what to do") without pulling in
/// the examples / rationale that follow.
pub(super) fn rule_directive_text(content: &str) -> String {
    const BODY_DIRECTIVE_CHARS: usize = 160;
    let title = rule_title(content, "");
    // Body = everything after the blank line that separates the metadata
    // header from the rule body. Fall back to the whole content when no such
    // separator exists (a bare chunk with no header).
    let body = content
        .split_once("\n\n")
        .map_or(content, |(_, body)| body)
        .trim();
    let body_head: String = body.chars().take(BODY_DIRECTIVE_CHARS).collect();
    format!("{title} {body_head}")
}

/// Generic topical/code anchors that, when SHARED between a query and a rule
/// directive, do NOT by themselves establish a subject/action concern match —
/// they are the words every rule in a file area tends to mention.
///
/// The leak-free A/B diagnosis is specific: the extra false positives came
/// from rules sharing one of these anchors (`panic`, `test`, `error`) plus
/// incidental filler, not a genuine subject overlap. Treating these as
/// non-distinctive forces the concern match to rest on a SPECIFIC shared
/// token (the subject/action: `false`, `invalid`, `hijack`, `memchr`) rather
/// than the topic everyone shares. Deliberately SMALL and conservative: only
/// the highest-frequency, lowest-specificity programming/review words go here
/// (a longer list would risk discounting a genuinely distinctive subject).
/// Stems are listed in their light-stem form so the membership test below can
/// compare on the stem and catch plural/gerund variants.
pub(super) fn is_generic_anchor(stem: &str) -> bool {
    const GENERIC_ANCHORS: &[&str] = &[
        "panic", "error", "test", "value", "code", "data", "type", "result", "check", "handle",
        "handler", "return", "input", "output", "field", "case", "call", "message", "method",
        "function", "file", "line", "block", "thread", "task", "async", "await", "default",
        "option", "config", "request", "response", "buffer", "queue", "size", "count", "index",
        "state", "event", "lock", "guard", "time", "timer", "runtime", "feature",
    ];
    GENERIC_ANCHORS.contains(&stem)
}

/// Stem-tolerant substring presence of a single query term inside a
/// directive STRING. Mirrors how [`super::lexical_boost`] scores presence
/// (`directive.contains(term)`), so a query term matches when the directive
/// contains it as a substring — `parse` inside `parsed`, `batch` inside
/// `batching`. Folding the query term to its light stem first makes the test
/// morphology-symmetric (`batching`/`retries` against a directive saying
/// `batch`/`retry`) without losing the substring tolerance that real recall
/// matches rely on. Returns the matched flag.
pub(super) fn term_present_in_directive(term: &str, directive: &str) -> bool {
    directive.contains(term) || directive.contains(light_stem(term).as_str())
}

/// Decide whether a single candidate rule's DIRECTIVE aligns with the query
/// intent — a CONCERN (subject/action) match, not mere topical adjacency.
///
/// Iter-2 (2026-06-02) hardened this with a DISTINCTIVENESS requirement,
/// because the review A/B (n=6) showed the iter-1 count-or-ratio test still
/// passed topically-adjacent, wrong-subject rules (extra false positives where
/// recall fired). The old test asked only "does the directive contain >=2
/// query terms, or cover half the query"; two GENERIC anchors (`panic` +
/// `input`) cleared it with no real subject overlap, and so did a lone generic
/// anchor that happened to be half of a 2-term query. The hardened test adds
/// the missing axis: the shared terms must include at least one SPECIFIC
/// (non-generic) subject/action token — the word that says these two
/// directives are about the SAME thing, not just the same topic.
///
/// A candidate aligns when it has at least [`MIN_DISTINCTIVE_SHARED_TERMS`]
/// distinctive shared term ([`is_generic_anchor`]) AND EITHER:
///   * **Absolute path** — the shared set has at least
///     [`MIN_INTENT_DIRECTIVE_OVERLAP`] terms (the verb/object pair), OR
///   * **Ratio path** — the shared set covers at least
///     [`MIN_INTENT_DIRECTIVE_OVERLAP_RATIO`] of the QUERY's salient terms
///     (keeps SHORT, sharp queries — and the realistic "one rule matches each
///     half of a two-word intent" fan-out — from over-pruning).
///
/// The distinctiveness gate is the precision lever (it kills the all-anchor
/// false positives the A/B diagnosed) and is deliberately chosen over a
/// rule-side coverage ratio, which proved too sensitive to directive phrasing:
/// a rule whose directive lives in its body, or whose title is a bare label,
/// has few salient TITLE terms and a verbose body, so any fixed coverage floor
/// either drops genuine body-phrased matches or fails to drop adjacency.
/// Distinctiveness keys off the SHARED set itself, so it is robust to how
/// either side is phrased. Self-recall — where the query *is* the rule's own
/// intent text — shares its specific subject tokens, so it always has a
/// distinctive overlap and is never regressed.
///
/// Pure: allocates the lowercased directive + a couple of stems, no I/O.
pub(super) fn directive_intent_aligned(content: &str, query_terms: &[String]) -> bool {
    if query_terms.is_empty() {
        return false;
    }
    // Presence string: title + a short body head, so a directive phrased only
    // in the body still contributes its subject/action terms when matching.
    let directive = rule_directive_text(content).to_ascii_lowercase();

    // Shared salient terms (stem-tolerant substring). Dedup on the query
    // term's stem so a query that repeats a concept doesn't inflate overlap,
    // and track how many shared terms are DISTINCTIVE (not a generic anchor).
    let mut shared_stems: Vec<String> = Vec::new();
    let mut distinctive_shared = 0usize;
    for term in query_terms {
        if !term_present_in_directive(term, &directive) {
            continue;
        }
        let stem = light_stem(term);
        if shared_stems.iter().any(|s| s == &stem) {
            continue;
        }
        if !is_generic_anchor(&stem) {
            distinctive_shared += 1;
        }
        shared_stems.push(stem);
    }
    let overlap = shared_stems.len();

    // Precision gate: a concern match must rest on at least one SPECIFIC
    // subject/action token. A shared set made only of generic anchors
    // (`panic`, `input`, `error`) is the topical adjacency the A/B blamed for
    // the extra false positives — drop it regardless of count or ratio.
    if distinctive_shared < MIN_DISTINCTIVE_SHARED_TERMS {
        return false;
    }

    // Absolute path: enough shared terms (verb + object), already known to
    // include a distinctive token.
    if overlap >= MIN_INTENT_DIRECTIVE_OVERLAP {
        return true;
    }
    // Ratio path: the shared (distinctive) set is at least half a short, focused
    // query — keeps short intents and the per-rule fan-out of a two-word intent
    // from over-pruning.
    let query_ratio = overlap as f64 / query_terms.len() as f64;
    query_ratio >= MIN_INTENT_DIRECTIVE_OVERLAP_RATIO
}

/// Light, allocation-cheap stemmer for intent-alignment term matching.
///
/// Strips the common English inflectional suffixes (`ies`→`y`, `ing`, `es`,
/// `s`) so a query term and a directive token that differ only by
/// plural/gerund form fold to the same stem ("batching"→"batch",
/// "retries"→"retry"). Deliberately NOT a full Porter stemmer: it only needs
/// to reconcile the surface morphology the alignment gate trips on, and an
/// over-aggressive stemmer would re-introduce the topical-adjacency overlap
/// the gate exists to suppress. Guarded by a minimum residual stem length so
/// short tokens (≤4 chars) are never truncated into noise.
pub(super) fn light_stem(term: &str) -> String {
    const MIN_STEM_LEN: usize = 4;
    if let Some(base) = term.strip_suffix("ies")
        && base.len() >= MIN_STEM_LEN - 1
    {
        // retries -> retri -> retry
        return format!("{base}y");
    }
    for suffix in ["ing", "es", "s"] {
        if let Some(base) = term.strip_suffix(suffix)
            && base.len() >= MIN_STEM_LEN
        {
            return base.to_owned();
        }
    }
    term.to_owned()
}

/// Coarse rule-kind taxonomy used by the time-decay multiplier.
///
/// `IndexedRuleChunk` does not yet carry an explicit `kind` column — the
/// rule corpus only stores `content` + denormalised metadata. So instead
/// of inventing jcode's literal `MemoryEntry::Kind` enum on the wrong
/// data model, we infer a coarse bucket from the rule's content. The
/// audit comment around line 355 already names the practical kinds
/// observed in production: corrections, slogans, style/lint rules, and
/// generic preferences/conventions. We mirror those here.
///
/// The inferred kind is enough to apply category-aware decay without widening
/// the index schema.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuleKind {
    /// "Don't do X", "always Y", "fix: ...", "regression". These are the
    /// rare, hard-won rules — long memory.
    Correction,
    /// User taste / project conventions ("we use Drizzle", "naming = X").
    /// Medium memory.
    Convention,
    /// Style / lint surface ("prefer let over const", formatting). Ages
    /// fastest because codebases reformat constantly.
    Style,
    /// Slogans / vague guidance ("trust CI", "review carefully") — same
    /// short half-life as style; the audit comment notes these misfire.
    Slogan,
    /// Anything we couldn't classify.
    Other,
}

/// Lightweight content heuristic. Cheap regex-free token checks so we
/// don't burn CPU per chunk per query.
pub fn infer_rule_kind(content: &str) -> RuleKind {
    let lc = content.to_ascii_lowercase();
    // Correction signals come first — they're the highest-value bucket
    // and we'd rather over-classify into long memory than evict them.
    const CORRECTION_HINTS: &[&str] = &[
        "don't ",
        "do not ",
        "never ",
        "must not ",
        "regression",
        "fix:",
        "bug:",
        "broke ",
        "incorrect",
        "wrong",
    ];
    if CORRECTION_HINTS.iter().any(|h| lc.contains(h)) {
        return RuleKind::Correction;
    }
    const STYLE_HINTS: &[&str] = &[
        "format",
        "indent",
        "spacing",
        "naming convention",
        "lint",
        "prettier",
        "rustfmt",
        "biome",
        "eslint",
    ];
    if STYLE_HINTS.iter().any(|h| lc.contains(h)) {
        return RuleKind::Style;
    }
    const CONVENTION_HINTS: &[&str] = &[
        "we use ",
        "prefer ",
        "always use ",
        "convention",
        "project uses",
    ];
    if CONVENTION_HINTS.iter().any(|h| lc.contains(h)) {
        return RuleKind::Convention;
    }
    const SLOGAN_HINTS: &[&str] = &[
        "trust ",
        "review carefully",
        "be careful",
        "best practice",
        "good practice",
    ];
    if SLOGAN_HINTS.iter().any(|h| lc.contains(h)) {
        return RuleKind::Slogan;
    }
    RuleKind::Other
}

/// Half-life in days per kind. Borrowed from jcode's
/// `effective_confidence` table but mapped to difflore's actual rule
/// taxonomy:
///
/// | jcode kind   | difflore kind | half-life |
/// |--------------|---------------|-----------|
/// | Correction   | Correction    | 365       |
/// | Preference   | Convention    | 120       |
/// | (n/a)        | Style         |  30       |
/// | Fact         | Slogan        |  30       |
/// | Entity (def) | Other         |  90       |
const fn half_life_days(kind: RuleKind) -> f32 {
    match kind {
        RuleKind::Correction => 365.0,
        RuleKind::Convention => 120.0,
        RuleKind::Style | RuleKind::Slogan => 30.0,
        RuleKind::Other => 90.0,
    }
}

/// Apply category-aware exponential decay to a raw confidence value.
///
/// `effective = raw * 0.5 ^ (age_days / half_life)`
///
/// At `age_days = 0` this returns `raw` unchanged. After one half-life
/// it returns `raw / 2`, etc. Used as the input to the existing
/// `0.9 + 0.1 * confidence` tie-breaker so old rules naturally lose
/// their tie-breaker bump.
pub fn effective_confidence(raw_confidence: f32, kind: &RuleKind, age_days: f32) -> f32 {
    let raw = raw_confidence.clamp(0.0, 1.0);
    let age = age_days.max(0.0);
    let half = half_life_days(*kind);
    raw * 0.5_f32.powf(age / half)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashMap;

    #[test]
    fn effective_confidence_fresh_correction_keeps_raw_value() {
        // Day-1 correction at conf=0.6 should be ~0.6 (decay negligible).
        let eff = effective_confidence(0.6, &RuleKind::Correction, 1.0);
        assert!(
            (eff - 0.6).abs() < 0.005,
            "fresh correction should keep raw conf, got {eff}"
        );
    }

    #[test]
    fn effective_confidence_two_year_old_style_decays_to_near_zero() {
        // 730 days at 30-day half-life = ~24.3 half-lives → essentially 0.
        let eff = effective_confidence(0.9, &RuleKind::Style, 730.0);
        assert!(
            eff < 1e-6,
            "two-year-old style rule should decay to ~0, got {eff}"
        );
    }

    #[test]
    fn strengthened_correction_outranks_fresh_slogan() {
        // 30-day-old correction with conf=0.95 vs day-1 slogan with
        // conf=0.5. Correction half-life 365 → factor ~0.944, eff ~0.897.
        // Slogan half-life 30 → day-1 factor ~0.977, eff ~0.488.
        let correction = effective_confidence(0.95, &RuleKind::Correction, 30.0);
        let slogan = effective_confidence(0.5, &RuleKind::Slogan, 1.0);
        assert!(
            correction > slogan,
            "strengthened correction ({correction}) should outrank fresh slogan ({slogan})"
        );
    }

    #[test]
    fn effective_confidence_at_age_zero_is_identity() {
        // Wiring relies on age=0.0 being a no-op until ages are plumbed.
        for k in [
            RuleKind::Correction,
            RuleKind::Convention,
            RuleKind::Style,
            RuleKind::Slogan,
            RuleKind::Other,
        ] {
            let eff = effective_confidence(0.73, &k, 0.0);
            assert!(
                (eff - 0.73).abs() < 1e-6,
                "age=0 must be identity for {k:?}, got {eff}"
            );
        }
    }

    #[test]
    fn age_days_map_decays_old_style_below_fresh_correction_via_options() {
        // Direct-formula assertion proving the plumbing makes a real
        // ranking difference. Without the map both rules would land at
        // the same `0.9 + 0.1 * raw_conf` weight; with the map a
        // 2-year-old style rule (raw 0.95) loses to a fresh correction
        // (raw 0.6) — the failure mode jcode's half-life borrows fix.
        let style_old = effective_confidence(0.95, &RuleKind::Style, 730.0);
        let correction_fresh = effective_confidence(0.6, &RuleKind::Correction, 1.0);
        let style_weight = 0.1f64.mul_add(style_old.clamp(0.0, 1.0) as f64, 0.9);
        let correction_weight = 0.1f64.mul_add(correction_fresh.clamp(0.0, 1.0) as f64, 0.9);
        assert!(
            correction_weight > style_weight,
            "fresh correction weight ({correction_weight}) must beat old-style weight ({style_weight}) once age is plumbed"
        );
    }

    #[test]
    fn age_days_map_lookup_falls_back_to_zero_for_unknown_skill() {
        // When the map is present but the chunk's skill_id isn't in it
        // (e.g. an old chunk whose skill was deleted), the scoring path
        // must default to age_days=0 — i.e. behave as if no decay applied
        // — instead of panicking or treating absence as "infinitely old".
        let map: HashMap<String, f32> =
            std::iter::once(("known-skill".to_owned(), 30.0_f32)).collect();
        let lookup_known = map.get("known-skill").copied().unwrap_or(0.0);
        let lookup_unknown = map.get("ghost-skill").copied().unwrap_or(0.0);
        assert!((lookup_known - 30.0).abs() < 1e-6);
        assert!(
            lookup_unknown.abs() < 1e-6,
            "unknown skill must fall back to 0"
        );
    }

    #[test]
    fn is_generic_anchor_flags_common_topic_words_only() {
        // The discount set must contain the high-frequency topic anchors the
        // diagnosis named, and must NOT contain specific subject/action tokens
        // (those are what a real concern match rests on).
        for generic in ["panic", "error", "test", "input", "handler", "runtime"] {
            assert!(
                is_generic_anchor(generic),
                "`{generic}` should be a generic anchor"
            );
        }
        for distinctive in [
            "hijack", "memchr", "invalid", "false", "session", "validate",
        ] {
            assert!(
                !is_generic_anchor(distinctive),
                "`{distinctive}` is a distinctive subject token, not a generic anchor"
            );
        }
    }

    #[test]
    fn rule_directive_text_distils_title_and_body_head() {
        let content = "Rule ID: r1\nRule Name: Return false on invalid input\nType: x\nTags: \n\nReturn false rather than panicking when the caller passes bad input.";
        let directive = rule_directive_text(content);
        assert!(directive.contains("Return false on invalid input"));
        assert!(directive.contains("panicking"));
        // The metadata header (Rule ID / Type / Tags lines) is excluded.
        assert!(!directive.contains("Rule ID"));
        assert!(!directive.contains("Tags"));
    }

    #[test]
    fn infer_rule_kind_buckets_common_phrasings() {
        assert_eq!(
            infer_rule_kind("Never use unwrap() in production code"),
            RuleKind::Correction
        );
        assert_eq!(
            infer_rule_kind("Run prettier before committing"),
            RuleKind::Style
        );
        assert_eq!(
            infer_rule_kind("We use Drizzle ORM for all queries"),
            RuleKind::Convention
        );
        assert_eq!(
            infer_rule_kind("Trust CI for workflow correctness"),
            RuleKind::Slogan
        );
        assert_eq!(infer_rule_kind("Some random observation"), RuleKind::Other);
    }
}