llm-kernel 0.9.0

Foundation library for Rust AI-native apps — provider catalog, LLM client, MCP server, search, telemetry, and safety
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
//! Elasticsearch `AsyncVectorIndex` (`elastic` feature).
//!
//! `ElasticsearchVectorIndex` implements [`AsyncVectorIndex`] over a
//! hand-rolled [`reqwest`] client speaking Elasticsearch 8.x's REST API. It is
//! the async counterpart to the in-memory [`VectorIndex`](crate::embedding::VectorIndex)
//! and a sibling of [`QdrantVectorIndex`](crate::embedding::QdrantVectorIndex).
//!
//! # Why hand-rolled reqwest (not the `elasticsearch` crate)?
//!
//! The official `elasticsearch` crate has **no stable release** — every
//! published version is `-alpha.x` (`max_stable_version: None` on crates.io).
//! For a foundation library heading into the v1.0.0 semver lock, an alpha
//! dependency is a blocker. The REST surface this trait needs is small (index
//! create/delete, bulk upsert/delete, knn `_search`, `_count`), so a typed
//! reqwest client reuses the existing `client-async` reqwest dependency and adds
//! zero transitive crates.
//!
//! # Scope
//!
//! Elasticsearch is a *hybrid* engine (BM25 + dense vector). Per the v0.9.0
//! design decision, this implementation exposes only the **dense-vector**
//! contract of [`AsyncVectorIndex`] — native BM25 text search via a
//! [`SearchProvider`](crate::search::SearchProvider) is deferred to a later
//! milestone. The vector results federate cleanly with Qdrant and TurboVec
//! because federation defaults to rank-based RRF (scale-invariant).
//!
//! # Score semantics
//!
//! The `score` field of each [`SearchHit`] carries the Elasticsearch knn
//! `_score`, which for a `cosine`-similarity `dense_vector` field is
//! `(1 + cosine) / 2 ∈ [0, 1]` — *not* the raw cosine that Qdrant reports
//! (`[0, 1]` of a different monotonic map) nor the `[-1, 1]` raw cosine of the
//! in-memory `TurbovecIndex`. Cross-backend score magnitudes are therefore not
//! directly comparable. This is harmless under the federation default
//! (Reciprocal Rank Fusion — rank-based and scale-invariant), but
//! `WeightedSum` federation (behind the optional `federation` feature, which
//! min-max normalizes each list in isolation before a weighted sum) should be
//! used with care across these heterogeneous scales. See the federation
//! module's "Why RRF is the default" docs for the full rationale.

use std::time::Duration;

use anyhow::{Result, anyhow};
use reqwest::header::CONTENT_TYPE;
use serde::Deserialize;

use super::{AsyncVectorIndex, SearchHit};

/// Async vector index backed by an Elasticsearch 8.x index.
///
/// The index is created on construction (a `dense_vector` field with cosine
/// similarity) if it does not already exist. All operations are async over a
/// plain [`reqwest::Client`]. Connection-string credentials embedded in `url`
/// (e.g. `https://user:pass@host`) are used for the request but never leaked in
/// error messages — see [`redact_credentials`].
pub struct ElasticsearchVectorIndex {
    client: reqwest::Client,
    /// Base URL, possibly containing `user:pass@` credentials. Used verbatim
    /// for requests; redacted everywhere else.
    base_url: String,
    index: String,
    dim: usize,
}

impl ElasticsearchVectorIndex {
    /// Connect to `url` (e.g. `http://localhost:9200`) and ensure `index`
    /// exists with a `dense_vector` field of `dim` dimensions and cosine
    /// similarity.
    pub async fn new(url: &str, index: &str, dim: usize) -> Result<Self> {
        validate_index_name(index)?;
        let client = reqwest::Client::builder()
            // Guard direct (non-federated) callers against an unresponsive node.
            // `FederatedSearch` additionally wraps each call in
            // `tokio::time::timeout`, but a bare `ElasticsearchVectorIndex` has
            // no such outer guard.
            .connect_timeout(Duration::from_secs(5))
            .timeout(Duration::from_secs(30))
            .build()
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))?;
        let idx = Self {
            client,
            base_url: url.trim_end_matches('/').to_string(),
            index: index.to_string(),
            dim,
        };
        idx.ensure_index().await?;
        Ok(idx)
    }

    /// Drop the backing index (useful for test cleanup or full reset).
    pub async fn delete_index(&self) -> Result<()> {
        let resp = self.delete(&format!("/{}", &self.index)).await?;
        // 200 (deleted) or 404 (already gone) are both fine.
        if !resp.status().is_success() && resp.status().as_u16() != 404 {
            return Err(self.status_err(resp).await);
        }
        Ok(())
    }

    /// Create the index with a dense_vector mapping if it does not exist.
    async fn ensure_index(&self) -> Result<()> {
        // HEAD /{index} → 200 if exists, 404 otherwise.
        let head = self
            .client
            .head(format!("{}/{}", &self.base_url, &self.index))
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))?;
        if head.status().as_u16() == 200 {
            return Ok(());
        }
        // 404 → create. Any other status is an error.
        if head.status().as_u16() != 404 {
            return Err(self.status_err(head).await);
        }
        let body = serde_json::json!({
            "mappings": {
                "properties": {
                    "vector": {
                        "type": "dense_vector",
                        "dims": self.dim,
                        "index": true,
                        "similarity": "cosine"
                    },
                    "ext_id": { "type": "long" }
                }
            }
        });
        let resp = self.put(&format!("/{}", &self.index), body).await?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        Ok(())
    }

    /// Parse a numeric `u64` id from an ES `_id`. Pure — unit-testable offline.
    /// Non-numeric ids are dropped, matching `QdrantVectorIndex`.
    fn parse_id(_id: &str) -> Option<u64> {
        _id.parse::<u64>().ok()
    }

    // --- private HTTP helpers (all errors redacted) -----------------------

    async fn put(&self, path: &str, body: serde_json::Value) -> Result<reqwest::Response> {
        self.client
            .put(format!("{}{}", &self.base_url, path))
            .json(&body)
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))
    }

    async fn delete(&self, path: &str) -> Result<reqwest::Response> {
        self.client
            .delete(format!("{}{}", &self.base_url, path))
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))
    }

    async fn ndjson(&self, path: &str, body: String) -> Result<reqwest::Response> {
        self.client
            .post(format!("{}{}", &self.base_url, path))
            .header(CONTENT_TYPE, "application/x-ndjson")
            .body(body)
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))
    }

    async fn status_err(&self, resp: reqwest::Response) -> anyhow::Error {
        let status = resp.status();
        let body = resp.text().await.unwrap_or_default();
        // Redact FIRST (strip any embedded credentials), then cap the body so a
        // huge ES error response cannot bloat logs/errors. The order matters:
        // a credential past the cap is already masked before truncation runs.
        let body = truncate_error_body(&redact_credentials(&body));
        anyhow!(
            "elasticsearch returned status {status} for index `{}` [url redacted]: {}",
            &self.index,
            body
        )
    }
}

#[async_trait::async_trait]
impl AsyncVectorIndex for ElasticsearchVectorIndex {
    async fn add(&self, vectors: &[Vec<f32>], ids: &[u64]) -> Result<()> {
        if vectors.len() != ids.len() {
            return Err(anyhow!(
                "vectors.len() ({}) must equal ids.len() ({})",
                vectors.len(),
                ids.len()
            ));
        }
        if vectors.is_empty() {
            return Ok(());
        }
        let mut body = String::new();
        for (v, &id) in vectors.iter().zip(ids.iter()) {
            body.push_str(
                &serde_json::to_string(&serde_json::json!({
                    "index": { "_index": &self.index, "_id": id.to_string() }
                }))
                .map_err(|e| anyhow!("bulk encode: {e}"))?,
            );
            body.push('\n');
            body.push_str(
                &serde_json::to_string(&serde_json::json!({
                    "ext_id": id,
                    "vector": v
                }))
                .map_err(|e| anyhow!("bulk encode: {e}"))?,
            );
            body.push('\n');
        }
        // `refresh=wait_for` makes the write immediately searchable, matching
        // Qdrant's `wait(true)` so the conformance test's subsequent searches
        // see the upsert without a race.
        let resp = self.ndjson("/_bulk?refresh=wait_for", body).await?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        let parsed: BulkResponse = decode(resp).await?;
        if parsed.errors {
            return Err(anyhow!(
                "elasticsearch bulk upsert reported per-item errors [url redacted]: {}",
                first_failing_bulk_item(&parsed.items)
            ));
        }
        Ok(())
    }

    async fn remove(&self, ids: &[u64]) -> Result<()> {
        if ids.is_empty() {
            return Ok(());
        }
        let mut body = String::new();
        for &id in ids {
            body.push_str(
                &serde_json::to_string(&serde_json::json!({
                    "delete": { "_index": &self.index, "_id": id.to_string() }
                }))
                .map_err(|e| anyhow!("bulk encode: {e}"))?,
            );
            body.push('\n');
        }
        let resp = self.ndjson("/_bulk?refresh=wait_for", body).await?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        // Per-item `not_found` for deletes does NOT set `errors: true`, so this
        // mirrors Qdrant's "silently ignore missing ids" contract.
        let parsed: BulkResponse = decode(resp).await?;
        if parsed.errors {
            return Err(anyhow!(
                "elasticsearch bulk delete reported per-item errors [url redacted]: {}",
                first_failing_bulk_item(&parsed.items)
            ));
        }
        Ok(())
    }

    /// kNN search over the `dense_vector` field. Each `SearchHit.score` is the
    /// ES knn `_score` (`(1 + cosine) / 2`), which is not comparable across
    /// backends — see [Score semantics](self#score-semantics).
    async fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchHit>> {
        let num_candidates = knn_num_candidates(k);
        let body = serde_json::json!({
            "knn": {
                "field": "vector",
                "query_vector": query,
                "k": k,
                "num_candidates": num_candidates
            },
            "_source": false,
            "size": k
        });
        let resp = self
            .client
            .post(format!("{}/{}/_search", &self.base_url, &self.index))
            .json(&body)
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        let parsed: SearchResponse = decode(resp).await?;
        Ok(parsed
            .hits
            .hits
            .into_iter()
            .filter_map(|h| {
                Self::parse_id(&h._id).map(|id| SearchHit {
                    id,
                    score: h._score,
                })
            })
            .collect())
    }

    async fn search_filtered(
        &self,
        query: &[f32],
        k: usize,
        allowlist: &[u64],
    ) -> Result<Vec<SearchHit>> {
        // An empty allowlist excludes every document (no candidates) → empty,
        // with NO fallback to an unfiltered search. Mirrors
        // `QdrantVectorIndex::search_filtered` exactly.
        if allowlist.is_empty() {
            return Ok(vec![]);
        }
        let num_candidates = knn_num_candidates(k);
        let allowlist: Vec<u64> = allowlist.to_vec();
        let body = serde_json::json!({
            "knn": {
                "field": "vector",
                "query_vector": query,
                "k": k,
                "num_candidates": num_candidates,
                "filter": [{ "terms": { "ext_id": allowlist } }]
            },
            "_source": false,
            "size": k
        });
        let resp = self
            .client
            .post(format!("{}/{}/_search", &self.base_url, &self.index))
            .json(&body)
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        let parsed: SearchResponse = decode(resp).await?;
        Ok(parsed
            .hits
            .hits
            .into_iter()
            .filter_map(|h| {
                Self::parse_id(&h._id).map(|id| SearchHit {
                    id,
                    score: h._score,
                })
            })
            .collect())
    }

    async fn len(&self) -> Result<usize> {
        let resp = self
            .client
            .post(format!("{}/{}/_count", &self.base_url, &self.index))
            .json(&serde_json::json!({}))
            .send()
            .await
            .map_err(|e| anyhow!(redact_credentials(&e.to_string())))?;
        if !resp.status().is_success() {
            return Err(self.status_err(resp).await);
        }
        let parsed: CountResponse = decode(resp).await?;
        Ok(parsed.count as usize)
    }

    fn dim(&self) -> usize {
        self.dim
    }
}

/// Strip `user:pass@` userinfo from any URL embedded in `s`.
///
/// Elasticsearch connection strings frequently embed basic-auth credentials
/// (`https://user:pass@host`). Every error this module produces is routed
/// through this function so credentials are never leaked in error messages or
/// logs. Pure — unit-testable offline. UTF-8 safe (operates on `&str` slices,
/// which are always char boundaries; the delimiters scanned are all ASCII).
///
/// Userinfo is everything before the **last** `@` within the URL authority
/// (matching the WHATWG URL spec), so a password that itself contains `@`
/// (`https://u:p@ss@host`) is fully redacted rather than leaking the tail.
pub fn redact_credentials(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    let mut rest = s;
    loop {
        match rest.find("://") {
            None => {
                out.push_str(rest);
                break;
            }
            Some(idx) => {
                // Copy the scheme and "://" verbatim.
                out.push_str(&rest[..idx + 3]);
                let after = &rest[idx + 3..];
                // The authority runs until the first path/query/fragment
                // delimiter. Within it, userinfo is everything before the LAST
                // '@' (so a password containing '@' is redacted whole).
                let auth_end = after.find(['/', '?', '#']).unwrap_or(after.len());
                let auth = &after[..auth_end];
                if let Some(at) = auth.rfind('@') {
                    out.push_str("<redacted>@");
                    out.push_str(&auth[at + 1..]);
                } else {
                    out.push_str(auth);
                }
                rest = &after[auth_end..];
            }
        }
    }
    out
}

/// Upper bound on the knn `num_candidates` Elasticsearch evaluates per shard.
///
/// ES scales `num_candidates` with `k` (a common heuristic is `10 * k`), but a
/// large `k` (e.g. 100) would otherwise ask ES to score 1 000 candidates —
/// pathological load for a foundation-library default. Capping at
/// [`MAX_KNN_CANDIDATES`] keeps the candidate pool bounded while staying well
/// above any realistic `k`. Pure — unit-testable offline.
const MAX_KNN_CANDIDATES: usize = 1_000;

/// Compute the knn `num_candidates` for a query returning the top `k` hits.
///
/// Returns `max(k, min(10 * k, MAX_KNN_CANDIDATES))`. ES requires
/// `num_candidates >= k` (it cannot return `k` neighbors from fewer than `k`
/// candidates), so the floor on `k` guarantees the invariant holds even when
/// the cap would otherwise clamp below it. `k == 0` does not underflow
/// (`k.max(1)`). Pure — unit-testable offline.
fn knn_num_candidates(k: usize) -> usize {
    let base = k.max(1).saturating_mul(10);
    base.min(MAX_KNN_CANDIDATES).max(k)
}

/// Maximum number of characters of an ES error response body to embed in an
/// [`anyhow::Error`]. A huge ES error body (e.g. a verbose
/// `mapper_parsing_exception`) could otherwise bloat logs and error chains;
/// the cap keeps the diagnostic surface bounded while the `... [truncated]`
/// marker signals that more is available on the ES side.
const ERROR_BODY_MAX_CHARS: usize = 1024;

/// Cap `s` to [`ERROR_BODY_MAX_CHARS`] characters, appending a `... [truncated]`
/// marker when it is longer.
///
/// Truncation happens at a UTF-8 character boundary (never mid-codepoint), so
/// the function is safe on multibyte text. Intended to be applied AFTER
/// [`redact_credentials`], so a credential past the cap is already masked.
/// Pure — unit-testable offline.
fn truncate_error_body(s: &str) -> String {
    if s.chars().count() <= ERROR_BODY_MAX_CHARS {
        return s.to_string();
    }
    // `char_indices().nth(N)` lands on the byte offset of the (N+1)-th char —
    // a guaranteed char boundary, so slicing is UTF-8 safe.
    let cut = s
        .char_indices()
        .nth(ERROR_BODY_MAX_CHARS)
        .map(|(i, _)| i)
        .unwrap_or(s.len());
    format!("{}... [truncated]", &s[..cut])
}

/// Validate an Elasticsearch index name against the 8.x naming rules.
///
/// ES rejects index names that are empty, exceed 255 UTF-8 bytes, contain
/// uppercase letters or bytes outside `[a-z0-9_.-]`, or begin with `_`, `-`,
/// or `+` (`.` is reserved for hidden/system indices, so it is allowed but
/// discouraged). Validating up front turns ES's opaque
/// `invalid_index_name_exception` 400 into a clear `Err` before any network
/// call. Pure — unit-testable offline.
fn validate_index_name(index: &str) -> Result<()> {
    if index.is_empty() {
        return Err(anyhow!("elasticsearch index name must not be empty"));
    }
    // ES hard-rejects the literal names "." and ".." (reserved), distinct from
    // the leading-dot allowance for hidden/system indices like `.myindex`.
    if index == "." || index == ".." {
        return Err(anyhow!(
            "elasticsearch index name must not be `.` or `..` (reserved): `{}`",
            index
        ));
    }
    if index.len() > 255 {
        return Err(anyhow!(
            "elasticsearch index name exceeds 255 bytes ({} bytes)",
            index.len()
        ));
    }
    match index.as_bytes()[0] {
        b'_' | b'-' | b'+' => {
            return Err(anyhow!(
                "elasticsearch index name must not start with `_`, `-`, or `+`: `{}`",
                index
            ));
        }
        _ => {}
    }
    if let Some(bad) = index.bytes().find(|&c| {
        !(c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, b'_' | b'-' | b'.'))
    }) {
        return Err(anyhow!(
            "elasticsearch index name contains an illegal byte 0x{bad:02x} (`{}`): \
             only lowercase a-z, 0-9, `_`, `-`, `.` are allowed",
            index
        ));
    }
    Ok(())
}

/// Render the first failing item of an ES `_bulk` response, redacted.
///
/// Each bulk item is `{ "<action>": { "_id": …, "status": N, "error": {…} } }`
/// where `<action>` is `index`/`create`/`update`/`delete`. An item counts as
/// failing when `status >= 400` or it carries an `error` object. Parsed as
/// opaque JSON so this is robust to ES version-specific item shape.
fn first_failing_bulk_item(items: &[serde_json::Value]) -> String {
    for item in items {
        if let Some(detail) = item.as_object().and_then(|o| o.values().next()) {
            let status = detail.get("status").and_then(|v| v.as_i64()).unwrap_or(0);
            let has_error = detail.get("error").is_some();
            if status >= 400 || has_error {
                return redact_credentials(&item.to_string());
            }
        }
    }
    "(no failing item found)".into()
}

/// Decode a JSON response body into `T`, redacting any URL in errors.
async fn decode<T: serde::de::DeserializeOwned>(resp: reqwest::Response) -> Result<T> {
    resp.json::<T>()
        .await
        .map_err(|e| anyhow!(redact_credentials(&e.to_string())))
}

#[derive(Deserialize)]
struct SearchResponse {
    hits: SearchHits,
}

#[derive(Deserialize)]
struct SearchHits {
    hits: Vec<SearchInnerHit>,
}

#[derive(Deserialize)]
struct SearchInnerHit {
    _id: String,
    _score: f32,
}

#[derive(Deserialize)]
struct CountResponse {
    count: u64,
}

#[derive(Deserialize)]
struct BulkResponse {
    errors: bool,
    #[serde(default)]
    items: Vec<serde_json::Value>,
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::embedding::AsyncVectorIndex;

    const DIM: usize = 4;

    fn unique_index() -> String {
        format!("llm_kernel_test_{}", std::process::id())
    }

    /// Build an index handle without connecting (no `ensure_index`). Lets the
    /// pure, pre-HTTP code paths (empty-allowlist short-circuit, redaction,
    /// id parsing) be unit-tested offline without an ES server.
    fn offline_index(base_url: &str, dim: usize) -> ElasticsearchVectorIndex {
        ElasticsearchVectorIndex {
            client: reqwest::Client::new(),
            base_url: base_url.trim_end_matches('/').to_string(),
            index: "llm_kernel_test_offline".to_string(),
            dim,
        }
    }

    #[test]
    fn parse_id_accepts_numeric_and_drops_rest() {
        assert_eq!(ElasticsearchVectorIndex::parse_id("42"), Some(42));
        assert_eq!(ElasticsearchVectorIndex::parse_id("0"), Some(0));
        assert_eq!(
            ElasticsearchVectorIndex::parse_id("18446744073709551615"),
            Some(u64::MAX)
        );
        // Non-numeric ids (ES can return string ids) are dropped.
        assert_eq!(ElasticsearchVectorIndex::parse_id("abc"), None);
        assert_eq!(ElasticsearchVectorIndex::parse_id(""), None);
        assert_eq!(ElasticsearchVectorIndex::parse_id("1.5"), None);
    }

    #[test]
    fn redact_credentials_strips_userinfo() {
        let cases = [
            ("http://u:pw@host:9200", "http://<redacted>@host:9200"),
            (
                "https://elastic:secret@es.local/x",
                "https://<redacted>@es.local/x",
            ),
            ("http://localhost:9200", "http://localhost:9200"),
            ("http://user@host", "http://<redacted>@host"),
            // Password containing '@' — userinfo spans to the LAST '@', so the
            // tail after the first '@' does not leak.
            ("https://u:p@ss@host:9200", "https://<redacted>@host:9200"),
            ("no url here", "no url here"),
            // Multibyte UTF-8 must survive intact (regression for the
            // byte-wise redaction that would corrupt non-ASCII).
            (
                "index 中文 — http://u:pw@h:9200",
                "index 中文 — http://<redacted>@h:9200",
            ),
        ];
        for (input, expected) in cases {
            assert_eq!(redact_credentials(input), expected, "input = {input:?}");
        }
        // The password never survives redaction.
        assert!(!redact_credentials("https://u:secret@host").contains("secret"));
        // An '@' embedded in the password must not leak the tail.
        let leaked = redact_credentials("https://u:p@ss@host:9200");
        assert!(!leaked.contains("p@ss"), "password tail leaked: {leaked}");
        assert!(!leaked.contains("ss@"), "password tail leaked: {leaked}");
    }

    #[test]
    fn validate_index_name_accepts_and_rejects() {
        // Valid.
        for ok in ["docs", "docs_v2", "my-index", "idx.2026", "a", "a.b-c_d"] {
            assert!(
                validate_index_name(ok).is_ok(),
                "{ok:?} should be a valid index name"
            );
        }
        // Rejected.
        for name in [
            "",            // empty
            "Docs",        // uppercase
            "with space",  // space
            "comma,idx",   // comma
            "_underscore", // leading _
            "-dash",       // leading -
            "+plus",       // leading +
            "bad/slash",   // slash
            "한글",        // non-ASCII
            ".",           // reserved literal
            "..",          // reserved literal
        ] {
            assert!(
                validate_index_name(name).is_err(),
                "{name:?} should be rejected"
            );
        }
        // 255-byte cap.
        assert!(validate_index_name(&"a".repeat(255)).is_ok());
        assert!(validate_index_name(&"a".repeat(256)).is_err());
    }

    /// `knn_num_candidates` scales 10x with `k`, clamps at the cap, and never
    /// drops below `k` (the ES `num_candidates >= k` invariant). Pure.
    #[test]
    fn knn_num_candidates_scales_caps_and_floors() {
        // Small k → 10*k (below the cap).
        assert_eq!(knn_num_candidates(1), 10);
        assert_eq!(knn_num_candidates(5), 50);
        assert_eq!(knn_num_candidates(50), 500);
        // Exactly at the cap boundary (10 * 100 = 1000 == cap).
        assert_eq!(knn_num_candidates(100), MAX_KNN_CANDIDATES);
        // Above the cap: clamped to the cap, but still >= k.
        assert_eq!(knn_num_candidates(200), MAX_KNN_CANDIDATES);
        assert!(knn_num_candidates(200) >= 200);
        // k == 0 must not underflow and still satisfy >= k.
        assert_eq!(knn_num_candidates(0), 10);
    }

    /// A short body is returned unchanged (no marker added). Pure.
    #[test]
    fn truncate_error_body_leaves_short_body_unchanged() {
        assert_eq!(truncate_error_body(""), "");
        assert_eq!(truncate_error_body("short error"), "short error");
        // Exactly at the cap: no truncation, no marker.
        let at_cap: String = "a".repeat(ERROR_BODY_MAX_CHARS);
        let out = truncate_error_body(&at_cap);
        assert_eq!(out.chars().count(), ERROR_BODY_MAX_CHARS);
        assert!(!out.contains("[truncated]"));
    }

    /// A body longer than the cap is cut at a char boundary and gets the
    /// truncation marker. Multibyte text must not panic or split a codepoint.
    /// Pure.
    #[test]
    fn truncate_error_body_caps_huge_body_with_marker() {
        // ASCII over-cap: cut to exactly ERROR_BODY_MAX_CHARS chars + marker.
        let huge: String = "a".repeat(ERROR_BODY_MAX_CHARS + 500);
        let out = truncate_error_body(&huge);
        assert!(out.ends_with("... [truncated]"));
        let kept = out.strip_suffix("... [truncated]").unwrap();
        assert_eq!(kept.chars().count(), ERROR_BODY_MAX_CHARS);

        // Multibyte (CJK) over-cap: truncation must land on a char boundary.
        // Build a body whose char count exceeds the cap but whose byte length
        // makes mid-codepoint slicing dangerous if done byte-wise.
        let cjk: String = "".repeat(ERROR_BODY_MAX_CHARS + 10);
        let out_cjk = truncate_error_body(&cjk);
        // No panic == the slice was char-boundary safe (else this would have
        // panicked at runtime on the slice). Marker present.
        assert!(out_cjk.contains("[truncated]"));
        // The kept portion (before marker) is valid UTF-8 by construction; the
        // whole output is a String so it already is. Just assert the marker.
    }

    /// A credential past the cap is still redacted: `redact_credentials` runs
    /// BEFORE `truncate_error_body`, so the masked form survives truncation.
    /// Pure — simulates the `status_err` redact→truncate order.
    #[test]
    fn truncate_error_body_keeps_credentials_redacted() {
        // A body shorter than the cap but with an embedded credential URL:
        // redaction applies, truncation is a no-op, credential is gone.
        let with_cred = "error: see https://u:super-secret@host/idx for details";
        let out = truncate_error_body(&redact_credentials(with_cred));
        assert!(!out.contains("super-secret"), "credential leaked: {out}");
        assert!(out.contains("<redacted>"));

        // A body LONGER than the cap with the credential URL near the END
        // (past the cut point). redact ran first, so even though truncation
        // drops the tail, the credential was already masked before the cut —
        // and the masked prefix is what survives. Either way the secret never
        // appears in the output.
        let padding: String = "x".repeat(ERROR_BODY_MAX_CHARS + 50);
        let long_cred = format!("{padding} then https://u:p@ss@host:9200");
        let redacted = redact_credentials(&long_cred);
        let out2 = truncate_error_body(&redacted);
        assert!(
            !out2.contains("p@ss") && !out2.contains("super-secret"),
            "credential tail leaked: {out2}"
        );
    }

    /// The bulk-error detail helper picks the first failing item (status >= 400
    /// OR carrying an `error` object), redacts any URL embedded in the item, and
    /// falls back when no item qualifies. Pure — exercised offline.
    #[test]
    fn first_failing_bulk_item_picks_failing_and_redacts() {
        // First failing item (status 400 + error) is surfaced.
        let items = vec![
            serde_json::json!({ "index": { "_id": "1", "status": 200 } }),
            serde_json::json!({
                "index": { "_id": "2", "status": 400, "error": { "type": "mapper", "reason": "bad" } }
            }),
        ];
        let s = first_failing_bulk_item(&items);
        assert!(
            s.contains("\"_id\":\"2\""),
            "should name the failing item: {s}"
        );
        assert!(s.contains("400"));
        // error-only failure (no status field) is still detected.
        let err_only = vec![serde_json::json!({
            "delete": { "_id": "9", "error": { "type": "x", "reason": "y" } }
        })];
        assert!(first_failing_bulk_item(&err_only).contains("\"_id\":\"9\""));
        // A credentialed URL embedded in the item JSON is redacted.
        let with_url = vec![serde_json::json!({
            "index": { "_id": "3", "status": 500, "error": { "reason": "see https://u:secret@host" } }
        })];
        let leaked = first_failing_bulk_item(&with_url);
        assert!(!leaked.contains("secret"), "credential leaked: {leaked}");
        assert!(leaked.contains("<redacted>"));
        // No qualifying item → fallback string.
        let none = vec![serde_json::json!({ "index": { "_id": "1", "status": 200 } })];
        assert_eq!(first_failing_bulk_item(&none), "(no failing item found)");
    }

    /// AC3: an error message derived from a credentialed URL must not contain
    /// the password substring. Simulates the redaction applied to every error
    /// this module produces, without needing a live connection.
    #[test]
    fn credentialed_url_error_redacts_password() {
        let credentialed = "https://elastic:super-secret-pw@es.internal:9200/idx";
        // The way the module builds error strings: redact(reqwest-like text).
        let raw = format!("error sending request for url ({credentialed}): connection refused");
        let redacted = redact_credentials(&raw);
        assert!(
            !redacted.contains("super-secret-pw"),
            "password leaked in redacted error: {redacted}"
        );
        assert!(redacted.contains("<redacted>"));
    }

    /// AC3: empty allowlist short-circuits to an empty result BEFORE any HTTP
    /// is issued. No server is contacted (the offline handle points nowhere).
    #[tokio::test]
    async fn empty_allowlist_returns_empty_without_network() {
        let idx = offline_index("http://0.0.0.0:1", DIM);
        // No `ensure_index` was run and no server listens at :1 — this would
        // error if the code attempted a request. It returns empty instead.
        let res = idx.search_filtered(&[1.0, 0.0, 0.0, 0.0], 5, &[]).await;
        assert!(res.is_ok(), "empty allowlist must not error: {res:?}");
        assert!(res.unwrap().is_empty());
    }

    /// Conformance body returning `Result` so failures are errors (not panics),
    /// letting the caller clean up the throwaway index on every exit path.
    async fn run_live_conformance(idx: &ElasticsearchVectorIndex) -> Result<()> {
        if idx.dim() != DIM {
            return Err(anyhow!("dim mismatch"));
        }
        if !idx.is_empty().await? {
            return Err(anyhow!("not empty at start"));
        }
        idx.add(
            &[vec![1.0, 0.0, 0.0, 0.0], vec![0.0, 1.0, 0.0, 0.0]],
            &[1, 2],
        )
        .await?;
        if idx.len().await? != 2 {
            return Err(anyhow!("len != 2 after add"));
        }

        let hits = idx.search(&[1.0, 0.0, 0.0, 0.0], 1).await?;
        if hits.len() != 1 || hits[0].id != 1 {
            return Err(anyhow!("nearest neighbor != id 1"));
        }

        let filtered = idx.search_filtered(&[1.0, 0.0, 0.0, 0.0], 2, &[2]).await?;
        if filtered.len() != 1 || filtered[0].id != 2 {
            return Err(anyhow!("filtered search != id 2"));
        }

        // Re-upsert id 1 with a different vector; count stays 2 (replace).
        idx.add(&[vec![0.9, 0.1, 0.0, 0.0]], &[1]).await?;
        if idx.len().await? != 2 {
            return Err(anyhow!("len != 2 after re-add"));
        }

        idx.remove(&[1]).await?;
        if idx.len().await? != 1 {
            return Err(anyhow!("len != 1 after remove"));
        }
        let after = idx.search(&[1.0, 0.0, 0.0, 0.0], 5).await?;
        if after.iter().any(|h| h.id == 1) {
            return Err(anyhow!("id 1 still present after remove"));
        }
        Ok(())
    }

    /// Live ES conformance (skips without `LLMKERNEL_ELASTIC_URL`). The
    /// throwaway index is deleted on EVERY exit path (pass or fail) so a
    /// mid-test failure cannot leak it.
    #[tokio::test]
    async fn live_elastic_conformance() {
        let url = match std::env::var("LLMKERNEL_ELASTIC_URL") {
            Ok(u) => u,
            Err(_) => {
                eprintln!("skipped: LLMKERNEL_ELASTIC_URL unset (no live Elasticsearch)");
                return;
            }
        };

        let index = unique_index();
        let idx = match ElasticsearchVectorIndex::new(&url, &index, DIM).await {
            Ok(i) => i,
            Err(e) => panic!("connect + create index: {e:?}"),
        };
        // Run the body, then ALWAYS delete the throwaway index before
        // propagating any failure — panic-safe cleanup.
        let result = run_live_conformance(&idx).await;
        let _ = idx.delete_index().await;
        result.expect("elasticsearch conformance failed");
    }
}