cqs 1.26.0

Code intelligence and RAG for AI agents. Semantic search, call graphs, impact analysis, type dependencies, and smart context assembly — in single tool calls. 54 languages + L5X/L5K PLC exports, 91.2% Recall@1 (BGE-large), 0.951 MRR (296 queries). Local ML, GPU-accelerated.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
//! Name matching and boosting logic.

use std::collections::HashSet;

use crate::nl::tokenize_identifier;

use super::config::ScoringConfig;

/// Stack budget for per-candidate name token ranges on the ASCII hot path.
/// Identifiers rarely exceed 8 words; 16 leaves headroom before overflowing
/// to a heap `Vec`. Each entry is `(u32, u32)` = 8 bytes, so the inline buffer
/// costs 128 bytes of stack.
const NAME_TOKEN_STACK: usize = 16;

/// Detect whether a query looks like a code identifier vs natural language.
/// Name-like: "parseConfig", "handle_error", "CircuitBreaker"
/// NL-like: "function that handles errors", "how does parsing work"
/// Used to gate name_boost — boosting by name similarity is harmful for
/// NL queries because it rewards coincidental substring matches over
/// semantic relevance.
pub(crate) fn is_name_like_query(query: &str) -> bool {
    let words: Vec<&str> = query.split_whitespace().collect();
    // Single token or two-token queries are likely identifiers
    if words.len() <= 2 {
        return true;
    }
    // NL indicators: common function words that never appear in identifiers
    const NL_WORDS: &[&str] = &[
        "the",
        "a",
        "an",
        "is",
        "are",
        "was",
        "were",
        "that",
        "which",
        "how",
        "what",
        "where",
        "when",
        "does",
        "do",
        "can",
        "should",
        "would",
        "could",
        "for",
        "with",
        "from",
        "into",
        "this",
        "these",
        "those",
        "function",
        "method",
        "code",
        "implement",
        "find",
        "search",
    ];
    let lower = query.to_lowercase();
    let lower_words: Vec<&str> = lower.split_whitespace().collect();
    for w in &lower_words {
        if NL_WORDS.contains(w) {
            return false;
        }
    }
    // 3+ words with no NL indicators — still likely NL if all lowercase
    // (identifiers are usually camelCase or snake_case)
    if words.len() >= 3 && lower == query && !query.contains('_') {
        return false;
    }
    true
}

/// Pre-tokenized query for efficient name matching in loops.
/// Create once before iterating over search results, then call `score()` for each name.
/// Avoids re-tokenizing the query for every result.
///
/// PF-V1.25-12: on the hot scoring path `score()` avoids all per-candidate
/// `String`/`Vec<String>` allocation for ASCII inputs (the overwhelming
/// common case for code identifiers). Word overlap uses byte-range slices
/// of the candidate name, held in a 16-slot stack buffer. Unicode inputs
/// fall back to `to_lowercase()` + `tokenize_identifier` to preserve the
/// previous behavior.
pub(crate) struct NameMatcher {
    query_lower: String,
    /// True if `query_lower` is pure ASCII — allows byte-wise case-insensitive
    /// comparisons with names, skipping `to_lowercase()` allocation entirely.
    query_is_ascii: bool,
    query_words: Vec<String>,
    /// True if every entry in `query_words` is pure ASCII. Used to gate the
    /// zero-alloc word-overlap path (which relies on byte-level comparisons).
    query_words_ascii: bool,
}

impl NameMatcher {
    /// Create a new matcher with pre-tokenized query
    pub fn new(query: &str) -> Self {
        let query_lower = query.to_lowercase();
        let query_is_ascii = query_lower.is_ascii();
        // tokenize_identifier already lowercases all tokens internally
        let query_words = tokenize_identifier(query);
        let query_words_ascii = query_words.iter().all(|w| w.is_ascii());
        Self {
            query_lower,
            query_is_ascii,
            query_words,
            query_words_ascii,
        }
    }

    /// Compute name match score against pre-tokenized query.
    ///
    /// PF-V1.25-12: exact-match, substring, and word-overlap tiers are all
    /// allocation-free for ASCII inputs (hot path for source code
    /// identifiers). The ASCII word-overlap tier tokenizes `name` into a
    /// stack-resident array of byte ranges and compares against
    /// `self.query_words` bytewise with `eq_ignore_ascii_case` (query words
    /// are already lowercase, so this is effectively direct byte equality).
    /// Tier 2 of the issue (pre-tokenized names stored in DB) would further
    /// eliminate the on-the-fly tokenize pass, but requires a schema
    /// migration.
    pub fn score(&self, name: &str) -> f32 {
        let cfg = &ScoringConfig::DEFAULT;

        // Fast path: both strings ASCII. Byte-wise case-insensitive
        // comparisons skip `to_lowercase()` allocation for the exact and
        // substring tiers.
        let both_ascii = self.query_is_ascii && name.is_ascii();
        if both_ascii {
            if name.eq_ignore_ascii_case(&self.query_lower) {
                return cfg.name_exact;
            }
            if ascii_contains_ignore_case(name, &self.query_lower) {
                return cfg.name_contains;
            }
            if ascii_contains_ignore_case(&self.query_lower, name) {
                return cfg.name_contained_by;
            }
        } else {
            // Slow path: Unicode inputs. Preserve pre-existing behavior by
            // materializing the Unicode-lowercased form for substring checks.
            let name_lower = name.to_lowercase();
            if name_lower == self.query_lower {
                return cfg.name_exact;
            }
            if name_lower.contains(&self.query_lower) {
                return cfg.name_contains;
            }
            if self.query_lower.contains(&name_lower) {
                return cfg.name_contained_by;
            }
        }

        // Word overlap scoring
        if self.query_words.is_empty() {
            return 0.0;
        }

        // ASCII fast path: tokenize `name` into byte-range slices on the
        // stack and compare against already-lowercase `query_words` bytewise.
        // No `Vec<String>` allocation, no `HashSet` allocation. Falls back
        // to the allocating path if the name tokenizes to >16 words (rare)
        // or if either side is Unicode.
        if both_ascii && self.query_words_ascii {
            let mut ranges: [(u32, u32); NAME_TOKEN_STACK] = [(0, 0); NAME_TOKEN_STACK];
            match ascii_tokenize_ranges(name, &mut ranges) {
                Ok(n_ranges) => {
                    if n_ranges == 0 {
                        return 0.0;
                    }
                    let bytes = name.as_bytes();
                    let ranges = &ranges[..n_ranges];
                    return ascii_word_overlap_score(&self.query_words, bytes, ranges, cfg);
                }
                Err(()) => {
                    // Overflowed the stack buffer — fall through to the
                    // allocating path below.
                }
            }
        }

        // Slow path: allocate a Vec<String> of name tokens. Used for Unicode
        // inputs and the rare case of >16 tokens.
        // tokenize_identifier already lowercases all tokens internally.
        let name_words: Vec<String> = tokenize_identifier(name);

        if name_words.is_empty() {
            return 0.0;
        }

        // Build HashSet for O(1) exact match lookup
        let name_word_set: HashSet<&str> = name_words.iter().map(String::as_str).collect();

        // O(m*n) substring matching trade-off:
        // - m = query words (typically 1-5), n = name words (typically 1-5)
        // - Worst case: ~25 comparisons per name, but short-circuits on exact match
        // - Alternative (pre-indexing substring tries) would add complexity for minimal gain
        //   since names are short and search results are already capped by limit
        let overlap = self
            .query_words
            .iter()
            .filter(|w| {
                // Fast path: exact word match
                if name_word_set.contains(w.as_str()) {
                    return true;
                }
                // Slow path: substring matching (only if no exact match)
                // Intentionally excludes equal-length substrings: if lengths are equal
                // but strings differ, they're not substrings of each other (would need
                // exact match, handled above). This avoids redundant contains() calls.
                name_words.iter().any(|nw| {
                    // Short-circuit: check length before expensive substring search
                    (nw.len() > w.len() && nw.contains(w.as_str()))
                        || (w.len() > nw.len() && w.contains(nw.as_str()))
                })
            })
            .count() as f32;
        let total = self.query_words.len().max(1) as f32;

        (overlap / total) * cfg.name_max_overlap
    }
}

/// Compute the word-overlap score for an ASCII candidate using byte-range
/// slices of `name_bytes` rather than owned `String`s.
///
/// `query_words` entries are assumed to be pre-lowercased ASCII (the case
/// produced by `tokenize_identifier` on ASCII queries). Each token of the
/// candidate is a slice `name_bytes[start..end]` — bytes are compared
/// case-insensitively against the already-lowercase query words.
///
/// O(m*n) matches the behavior of the legacy `HashSet`+linear-scan path;
/// for the typical 1–5 query words × 1–5 name words the constant factors
/// of direct byte comparison beat hash-set probing.
fn ascii_word_overlap_score(
    query_words: &[String],
    name_bytes: &[u8],
    name_ranges: &[(u32, u32)],
    cfg: &ScoringConfig,
) -> f32 {
    let overlap = query_words
        .iter()
        .filter(|w| {
            let w_bytes = w.as_bytes();
            // Fast path: exact word match (case-insensitive byte compare).
            if name_ranges.iter().any(|&(s, e)| {
                let slice = &name_bytes[s as usize..e as usize];
                slice.len() == w_bytes.len() && slice.eq_ignore_ascii_case(w_bytes)
            }) {
                return true;
            }
            // Slow path: substring matching (case-insensitive). Excludes
            // equal-length substrings — those can only match exactly, which
            // is handled above.
            name_ranges.iter().any(|&(s, e)| {
                let nw = &name_bytes[s as usize..e as usize];
                (nw.len() > w_bytes.len() && ascii_bytes_contains_ignore_case(nw, w_bytes))
                    || (w_bytes.len() > nw.len() && ascii_bytes_contains_ignore_case(w_bytes, nw))
            })
        })
        .count() as f32;
    let total = query_words.len().max(1) as f32;
    (overlap / total) * cfg.name_max_overlap
}

/// Tokenize an ASCII `&str` into byte-range slices of the input, writing
/// into the caller-provided `out` buffer.
///
/// Matches the semantics of `crate::nl::tokenize_identifier` for pure-ASCII
/// input:
/// - `_`, `-`, and ` ` are delimiters (token boundary, not emitted).
/// - An ASCII uppercase letter starts a new token when the current token is
///   non-empty.
/// - All other characters extend the current token.
///
/// Returns the number of ranges written, or `Err(())` if the name produced
/// more than `out.len()` tokens (caller must fall back to allocating path).
///
/// The returned ranges point at the *raw* bytes of `name`, not lowercased
/// forms — case folding is deferred to the comparison step.
fn ascii_tokenize_ranges(name: &str, out: &mut [(u32, u32)]) -> Result<usize, ()> {
    let bytes = name.as_bytes();
    let mut count = 0usize;
    let mut start: Option<u32> = None;
    let mut i: u32 = 0;
    while (i as usize) < bytes.len() {
        let b = bytes[i as usize];
        let is_delim = b == b'_' || b == b'-' || b == b' ';
        let is_upper = b.is_ascii_uppercase();
        if is_delim {
            if let Some(s) = start.take() {
                if count >= out.len() {
                    return Err(());
                }
                out[count] = (s, i);
                count += 1;
            }
            // Skip delimiter byte; start remains None.
            i += 1;
            continue;
        }
        if is_upper {
            if let Some(s) = start.take() {
                if count >= out.len() {
                    return Err(());
                }
                out[count] = (s, i);
                count += 1;
            }
            start = Some(i);
            i += 1;
            continue;
        }
        // Ordinary character — start a token if one isn't open.
        if start.is_none() {
            start = Some(i);
        }
        i += 1;
    }
    if let Some(s) = start.take() {
        if count >= out.len() {
            return Err(());
        }
        out[count] = (s, i);
        count += 1;
    }
    Ok(count)
}

/// Byte-level analogue of `ascii_contains_ignore_case` used by the ASCII
/// word-overlap path where inputs are already `&[u8]` slices.
fn ascii_bytes_contains_ignore_case(haystack: &[u8], needle: &[u8]) -> bool {
    if needle.is_empty() {
        return true;
    }
    if needle.len() > haystack.len() {
        return false;
    }
    let first = needle[0];
    let last_start = haystack.len() - needle.len();
    for i in 0..=last_start {
        if haystack[i].eq_ignore_ascii_case(&first)
            && haystack[i + 1..i + needle.len()]
                .iter()
                .zip(&needle[1..])
                .all(|(a, b)| a.eq_ignore_ascii_case(b))
        {
            return true;
        }
    }
    false
}

/// Case-insensitive ASCII substring search: is `needle` a substring of `haystack`?
///
/// Both arguments must be ASCII. Bytes are compared with
/// `u8::eq_ignore_ascii_case` on the fly, so callers do not need to
/// pre-lowercase either side. Zero allocation.
///
/// Empty needle matches (mirrors `str::contains("")` semantics).
fn ascii_contains_ignore_case(haystack: &str, needle: &str) -> bool {
    let h = haystack.as_bytes();
    let n = needle.as_bytes();
    if n.is_empty() {
        return true;
    }
    if n.len() > h.len() {
        return false;
    }
    let first = n[0];
    let last_start = h.len() - n.len();
    for i in 0..=last_start {
        if h[i].eq_ignore_ascii_case(&first)
            && h[i + 1..i + n.len()]
                .iter()
                .zip(&n[1..])
                .all(|(a, b)| a.eq_ignore_ascii_case(b))
        {
            return true;
        }
    }
    false
}

/// Compute name match score for hybrid search
/// For repeated calls with the same query, use `NameMatcher::new(query).score(name)` instead.
#[cfg(test)]
pub(crate) fn name_match_score(query: &str, name: &str) -> f32 {
    NameMatcher::new(query).score(name)
}

#[cfg(test)]
mod tests {
    use super::*;

    // ===== name_match_score tests =====

    #[test]
    fn test_name_match_exact() {
        assert_eq!(name_match_score("parse", "parse"), 1.0);
    }

    #[test]
    fn test_name_match_contains() {
        assert_eq!(name_match_score("parse", "parseConfig"), 0.8);
    }

    #[test]
    fn test_name_match_contained() {
        assert_eq!(name_match_score("parseConfigFile", "parse"), 0.6);
    }

    #[test]
    fn test_name_match_partial_overlap() {
        let score = name_match_score("parseConfig", "configParser");
        assert!(score > 0.0 && score <= 0.5);
    }

    #[test]
    fn test_name_match_no_match() {
        assert_eq!(name_match_score("foo", "bar"), 0.0);
    }

    // ===== is_name_like_query tests =====

    #[test]
    fn test_name_like_single_token() {
        assert!(is_name_like_query("parseConfig"));
        assert!(is_name_like_query("CircuitBreaker"));
        assert!(is_name_like_query("handle_error"));
    }

    #[test]
    fn test_name_like_two_tokens() {
        assert!(is_name_like_query("parse config"));
        assert!(is_name_like_query("error handler"));
    }

    #[test]
    fn test_nl_query_with_indicators() {
        assert!(!is_name_like_query("function that handles errors"));
        assert!(!is_name_like_query("how does parsing work"));
        assert!(!is_name_like_query("find error handling code"));
        assert!(!is_name_like_query("code that implements retry logic"));
    }

    #[test]
    fn test_nl_query_all_lowercase_3_plus_words() {
        assert!(!is_name_like_query("error handling retry"));
    }

    #[test]
    fn test_name_like_snake_case_multi() {
        // snake_case with 3+ words is still name-like
        assert!(is_name_like_query("handle_error_retry"));
    }

    // ===== ascii_contains_ignore_case tests =====

    #[test]
    fn test_ascii_contains_ignore_case_basic() {
        assert!(ascii_contains_ignore_case("parseConfig", "parse"));
        assert!(ascii_contains_ignore_case("parseConfig", "Config"));
        assert!(ascii_contains_ignore_case("parseConfig", "config"));
        assert!(ascii_contains_ignore_case("parseConfig", "PARSE"));
        assert!(!ascii_contains_ignore_case("parseConfig", "missing"));
    }

    #[test]
    fn test_ascii_contains_ignore_case_edges() {
        // Empty needle matches (matches std::str::contains behavior)
        assert!(ascii_contains_ignore_case("parse", ""));
        // Needle longer than haystack
        assert!(!ascii_contains_ignore_case("p", "parse"));
        // Exact match counts as substring
        assert!(ascii_contains_ignore_case("parse", "PARSE"));
        // Substring at end
        assert!(ascii_contains_ignore_case("abcdef", "ef"));
        // Substring at start
        assert!(ascii_contains_ignore_case("abcdef", "ab"));
    }

    #[test]
    fn test_ascii_contains_ignore_case_matches_std() {
        // Cross-check against std::str::contains after to_lowercase to confirm
        // we produce identical results on ASCII inputs.
        let cases = [
            ("parseConfig", "parse"),
            ("parseConfig", "Config"),
            ("parseConfig", "xyz"),
            ("CircuitBreaker", "break"),
            ("", ""),
            ("a", "a"),
            ("ab", "ba"),
            ("parse", "parseConfig"),
        ];
        for (h, n) in cases {
            let expected = h.to_lowercase().contains(&n.to_lowercase());
            let got = ascii_contains_ignore_case(h, &n.to_lowercase());
            assert_eq!(got, expected, "mismatch for ({h:?}, {n:?})");
        }
    }

    // ===== Unicode fallback path =====

    #[test]
    fn test_name_match_unicode_fallback_exact() {
        // Non-ASCII path must preserve to_lowercase semantics.
        assert_eq!(name_match_score("获取用户", "获取用户"), 1.0);
    }

    #[test]
    fn test_name_match_unicode_fallback_contains() {
        // "getUser获取" vs "获取": name contains query after to_lowercase
        let score = name_match_score("获取", "getUser获取");
        assert_eq!(score, 0.8);
    }

    // ===== Behavior-equivalence spot checks =====

    /// Mirror the pre-refactor algorithm verbatim so we can confirm the new
    /// implementation produces identical scores across a range of inputs.
    fn legacy_score(query: &str, name: &str) -> f32 {
        let cfg = &ScoringConfig::DEFAULT;
        let query_lower = query.to_lowercase();
        let query_words: Vec<String> = tokenize_identifier(query);
        legacy_score_with_prebuilt(&query_lower, &query_words, name, cfg)
    }

    /// Legacy algorithm with the query pre-tokenized (matches production usage
    /// where `NameMatcher::new` hoists query tokenization out of the candidate
    /// loop). Used by the micro-bench for an apples-to-apples comparison.
    fn legacy_score_with_prebuilt(
        query_lower: &str,
        query_words: &[String],
        name: &str,
        cfg: &ScoringConfig,
    ) -> f32 {
        let name_lower = name.to_lowercase();
        if name_lower == query_lower {
            return cfg.name_exact;
        }
        if name_lower.contains(query_lower) {
            return cfg.name_contains;
        }
        if query_lower.contains(&name_lower) {
            return cfg.name_contained_by;
        }
        if query_words.is_empty() {
            return 0.0;
        }
        let name_words: Vec<String> = tokenize_identifier(name);
        if name_words.is_empty() {
            return 0.0;
        }
        let name_word_set: HashSet<&str> = name_words.iter().map(String::as_str).collect();
        let overlap = query_words
            .iter()
            .filter(|w| {
                if name_word_set.contains(w.as_str()) {
                    return true;
                }
                name_words.iter().any(|nw| {
                    (nw.len() > w.len() && nw.contains(w.as_str()))
                        || (w.len() > nw.len() && w.contains(nw.as_str()))
                })
            })
            .count() as f32;
        let total = query_words.len().max(1) as f32;
        (overlap / total) * cfg.name_max_overlap
    }

    #[test]
    fn test_refactor_matches_legacy() {
        let cases: &[(&str, &str)] = &[
            ("parse", "parse"),
            ("parse", "PARSE"),
            ("parse", "parseConfig"),
            ("parseConfig", "parse"),
            ("parseConfig", "configParser"),
            ("foo", "bar"),
            ("CircuitBreaker", "circuit_breaker"),
            ("handle_error", "handleError"),
            ("xml_parser", "XMLParser"),
            ("", "anything"),
            ("anything", ""),
            ("", ""),
            ("parseConfigFile", "parseConfigFile"),
            ("获取用户", "获取用户"),
            ("获取", "getUser获取"),
            ("get", "GETTER"),
            ("snake_case_id", "snake_case_id"),
            ("a", "ab"),
            ("ab", "a"),
            ("foo bar", "foo_bar"),
        ];
        for (q, n) in cases {
            let expected = legacy_score(q, n);
            let got = name_match_score(q, n);
            assert_eq!(
                got, expected,
                "score mismatch for query={q:?} name={n:?}: got {got} expected {expected}"
            );
        }
    }

    // ===== Micro-benchmark =====
    //
    // PF-V1.25-12: log per-call cost so we can track regressions.
    // Does not assert; just prints elapsed_per_call_ns.
    #[test]
    fn bench_score_hot_path() {
        let matcher = NameMatcher::new("parseConfig");
        // 50-char ASCII candidate name (typical upper bound for real identifiers)
        let name = "parseConfigurationAndHandleErrorForExternalClient";
        assert_eq!(name.len(), 49);

        const ITERS: u32 = 10_000;

        // Refactored path.
        let start = std::time::Instant::now();
        let mut acc: f32 = 0.0;
        for _ in 0..ITERS {
            acc += matcher.score(name);
        }
        let refactor_elapsed = start.elapsed();
        assert!(acc >= 0.0);

        // Legacy path (inline copy, for before/after comparison on the same
        // hardware and build profile). Kept alongside the refactored path so
        // we can detect regressions without cherry-picking old revisions.
        // Query tokenization is hoisted out of the loop to match production's
        // `NameMatcher::new` + per-candidate `score()` shape.
        let cfg = &ScoringConfig::DEFAULT;
        let legacy_query_lower = "parseConfig".to_lowercase();
        let legacy_query_words = tokenize_identifier("parseConfig");
        let start = std::time::Instant::now();
        let mut acc2: f32 = 0.0;
        for _ in 0..ITERS {
            acc2 += legacy_score_with_prebuilt(&legacy_query_lower, &legacy_query_words, name, cfg);
        }
        let legacy_elapsed = start.elapsed();
        assert!(acc2 >= 0.0);

        let refactor_ns = refactor_elapsed.as_nanos() as f64 / ITERS as f64;
        let legacy_ns = legacy_elapsed.as_nanos() as f64 / ITERS as f64;
        eprintln!(
            "bench_score_hot_path: {iters} iters over {candidate_len}-char ASCII name",
            iters = ITERS,
            candidate_len = name.len(),
        );
        eprintln!("  refactored: {refactor_elapsed:?} total, {refactor_ns:.1} ns/call");
        eprintln!("  legacy:     {legacy_elapsed:?} total, {legacy_ns:.1} ns/call");
        eprintln!("  speedup:    {:.2}x", legacy_ns / refactor_ns);
    }

    // ===== ASCII tokenizer tests =====

    fn ranges_to_strings(name: &str, ranges: &[(u32, u32)]) -> Vec<String> {
        ranges
            .iter()
            .map(|&(s, e)| name[s as usize..e as usize].to_ascii_lowercase())
            .collect()
    }

    #[test]
    fn test_ascii_tokenize_ranges_matches_tokenize_identifier() {
        // For ASCII input, the byte-range tokenizer must produce the same
        // tokens (after lowercasing) as `tokenize_identifier`.
        let cases = [
            "parseConfig",
            "parseConfigFile",
            "handle_error",
            "snake_case_id",
            "XMLParser",
            "ABCD",
            "A",
            "",
            "foo bar",
            "foo-bar",
            "a__b",
            "_leading",
            "trailing_",
            "mixCase_and-dashes",
        ];
        for name in cases {
            let mut buf = [(0u32, 0u32); NAME_TOKEN_STACK];
            let n = ascii_tokenize_ranges(name, &mut buf).expect("should fit");
            let got = ranges_to_strings(name, &buf[..n]);
            let expected = tokenize_identifier(name);
            assert_eq!(got, expected, "tokenizer mismatch for {name:?}");
        }
    }

    #[test]
    fn test_ascii_tokenize_ranges_overflow() {
        // Force more tokens than the buffer can hold. Each underscore starts
        // a new token; 20 single-letter tokens overflows the 16-slot buffer.
        let name = "a_b_c_d_e_f_g_h_i_j_k_l_m_n_o_p_q_r_s_t";
        let mut buf = [(0u32, 0u32); NAME_TOKEN_STACK];
        let result = ascii_tokenize_ranges(name, &mut buf);
        assert!(result.is_err(), "expected Err on overflow, got {result:?}");
    }

    #[test]
    fn test_score_overflow_falls_back_to_allocating_path() {
        // When the name has more than NAME_TOKEN_STACK tokens the ASCII fast
        // path must fall through to the allocating path with identical
        // scores. Build a 20-token name and score against a query that hits
        // one token exactly.
        let name = "a_b_c_d_e_f_g_h_i_j_k_l_m_n_o_p_q_r_s_t";
        let q = "p"; // will exact-match token "p"
        let expected = {
            // Run the legacy algorithm to produce the reference score.
            let cfg = &ScoringConfig::DEFAULT;
            let query_lower = q.to_lowercase();
            let query_words: Vec<String> = tokenize_identifier(q);
            legacy_score_with_prebuilt(&query_lower, &query_words, name, cfg)
        };
        let got = name_match_score(q, name);
        assert_eq!(
            got, expected,
            "overflow fallback produced different score ({got} vs {expected})"
        );
        assert!(got > 0.0, "expected positive score for token hit");
    }

    #[test]
    fn test_ascii_bytes_contains_ignore_case() {
        assert!(ascii_bytes_contains_ignore_case(b"parseConfig", b"parse"));
        assert!(ascii_bytes_contains_ignore_case(b"parseConfig", b"CONFIG"));
        assert!(!ascii_bytes_contains_ignore_case(b"parse", b"parseConfig"));
        assert!(ascii_bytes_contains_ignore_case(b"abc", b""));
        assert!(!ascii_bytes_contains_ignore_case(b"", b"a"));
    }
}