embedrs 0.3.3

Unified embedding — cloud APIs (OpenAI, Cohere, Gemini, Voyage, Jina) + local inference, one interface
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
/// benchmark: rigorous comparison of local vs OpenAI vs Gemini
///
/// methodology:
///   1. graded similarity — 32 pairs with human scores (0-5), spearman correlation
///   2. retrieval accuracy — query→document matching, top-1/top-3 accuracy
///   3. multilingual — english, chinese, japanese breakdown
///   4. cross-lingual alignment — same sentence across languages
///   5. text length sensitivity — short/medium/long text quality
///   6. robustness — typos, casing, word order variations
///   7. clustering quality — topic grouping accuracy
///   8. throughput — 500 texts batch processing speed
///
/// run: cargo run -p embedrs --features local --example benchmark --release
use std::time::Instant;

#[tokio::main]
async fn main() -> embedrs::Result<()> {
    dotenvy::from_filename(".env.local").ok();
    dotenvy::from_filename(
        std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
            .parent()
            .unwrap()
            .parent()
            .unwrap()
            .join(".env.local"),
    )
    .ok();

    let openai_key = std::env::var("OPENAI_API_KEY").ok();
    let gemini_key = std::env::var("GEMINI_API_KEY").ok();
    let cohere_key = std::env::var("COHERE_API_KEY").ok();
    let voyage_key = std::env::var("VOYAGE_API_KEY").ok();

    println!("╔══════════════════════════════════════════════╗");
    println!("║       embedrs benchmark suite v1.0           ║");
    println!("╚══════════════════════════════════════════════╝\n");

    // collect all texts for tests 1-7
    let similarity_pairs = graded_similarity_pairs();
    let retrieval_sets = retrieval_test_sets();
    let crosslingual_groups = crosslingual_groups();
    let length_pairs = length_sensitivity_pairs();
    let robustness_pairs = robustness_pairs();
    let cluster_groups = cluster_groups();

    let mut all_texts: Vec<String> = Vec::new();
    let mut add = |s: &str| {
        let owned = s.to_string();
        if !all_texts.contains(&owned) {
            all_texts.push(owned);
        }
    };

    for (a, b, _) in &similarity_pairs {
        add(a);
        add(b);
    }
    for (query, candidates, _) in &retrieval_sets {
        add(query);
        for c in *candidates {
            add(c);
        }
    }
    for group in &crosslingual_groups {
        for s in *group {
            add(s);
        }
    }
    for (a, b, _) in &length_pairs {
        add(a);
        add(b);
    }
    for (orig, variants) in &robustness_pairs {
        add(orig);
        for v in *variants {
            add(v);
        }
    }
    for (_, texts) in &cluster_groups {
        for t in *texts {
            add(t);
        }
    }

    println!("corpus: {} unique texts (tests 1-7)", all_texts.len());
    println!(
        "  {} similarity pairs, {} retrieval queries, {} crosslingual groups",
        similarity_pairs.len(),
        retrieval_sets.len(),
        crosslingual_groups.len()
    );
    println!(
        "  {} length pairs, {} robustness sets, {} clusters\n",
        length_pairs.len(),
        robustness_pairs.len(),
        cluster_groups.len()
    );

    // --- run each provider ---
    // local models
    let local_models = [
        (
            "all-MiniLM-L6-v2",
            "LOCAL: all-MiniLM-L6-v2 (6L, 23MB, 384-dim)",
        ),
        (
            "all-MiniLM-L12-v2",
            "LOCAL: all-MiniLM-L12-v2 (12L, 133MB, 384-dim)",
        ),
        (
            "bge-small-en-v1.5",
            "LOCAL: bge-small-en-v1.5 (12L, 133MB, 384-dim)",
        ),
        ("gte-small", "LOCAL: gte-small (12L, 67MB, 384-dim)"),
    ];

    for (model_name, label) in &local_models {
        println!("━━━ {label} ━━━");
        if let Err(e) = run_provider(
            &format!("local:{model_name}"),
            None,
            &all_texts,
            &similarity_pairs,
            &retrieval_sets,
            &crosslingual_groups,
            &length_pairs,
            &robustness_pairs,
            &cluster_groups,
        )
        .await
        {
            println!("  error: {e}\n");
        }
    }

    // cloud providers
    let cloud_providers: Vec<(&str, &str, Option<&str>)> = {
        let mut v = Vec::new();
        if let Some(k) = openai_key.as_deref() {
            v.push((
                "openai",
                "OPENAI: text-embedding-3-small (1536-dim)",
                Some(k),
            ));
        }
        if let Some(k) = gemini_key.as_deref() {
            v.push(("gemini", "GEMINI: gemini-embedding-001 (3072-dim)", Some(k)));
        }
        if let Some(k) = cohere_key.as_deref() {
            v.push(("cohere", "COHERE: embed-v4.0 (1024-dim)", Some(k)));
        }
        if let Some(k) = voyage_key.as_deref() {
            v.push(("voyage", "VOYAGE: voyage-3-large (1024-dim)", Some(k)));
        }
        v
    };

    for (name, label, key) in &cloud_providers {
        println!("━━━ {label} ━━━");
        if let Err(e) = run_provider(
            name,
            *key,
            &all_texts,
            &similarity_pairs,
            &retrieval_sets,
            &crosslingual_groups,
            &length_pairs,
            &robustness_pairs,
            &cluster_groups,
        )
        .await
        {
            println!("  error: {e}\n");
        }
    }

    if cloud_providers.is_empty() {
        println!("━━━ CLOUD: skipped (no API keys found) ━━━\n");
    }

    println!("━━━ COST SUMMARY ━━━");
    println!("  local:   $0 / unlimited (CPU inference)");
    println!("  openai:  $0.02 / 1M tokens (text-embedding-3-small)");
    println!("  gemini:  free tier 1500 RPM, then usage-based");
    println!("  cohere:  $0.10 / 1M tokens (embed-v4.0)");
    println!("  voyage:  $0.06 / 1M tokens (voyage-3-large)\n");

    Ok(())
}

#[allow(clippy::too_many_arguments)]
async fn run_provider(
    name: &str,
    api_key: Option<&str>,
    all_texts: &[String],
    similarity_pairs: &[(&str, &str, f32)],
    retrieval_sets: &[(&str, &[&str], usize)],
    crosslingual_groups: &[&[&str]],
    length_pairs: &[(&str, &str, f32)],
    robustness_pairs: &[(&str, &[&str])],
    cluster_groups: &[(&str, &[&str])],
) -> embedrs::Result<()> {
    let client = if let Some(model_name) = name.strip_prefix("local:") {
        embedrs::Client::local(model_name)?
    } else {
        match name {
            "openai" => embedrs::Client::openai(api_key.unwrap()),
            "gemini" => embedrs::Client::gemini(api_key.unwrap()),
            "cohere" => embedrs::Client::cohere(api_key.unwrap()),
            "voyage" => embedrs::Client::voyage(api_key.unwrap()),
            _ => unreachable!(),
        }
    };

    // warmup for local (includes model download + load)
    if name.starts_with("local:") {
        let t = Instant::now();
        let _ = client.embed(vec!["warmup".into()]).await?;
        println!("  model load: {}ms", t.elapsed().as_millis());
    }

    // embed all texts
    let start = Instant::now();
    let result = client.embed_batch(all_texts.to_vec()).await?;
    let elapsed = start.elapsed();
    let ms = elapsed.as_secs_f64() * 1000.0;

    println!("  dimensions: {}", result.embeddings[0].len());
    println!(
        "  latency: {ms:.0}ms total, {:.1}ms/text ({} texts)",
        ms / all_texts.len() as f64,
        all_texts.len()
    );
    if result.usage.total_tokens > 0 {
        println!("  tokens: {}", result.usage.total_tokens);
    }

    let lookup = |text: &str| -> &Vec<f32> {
        let idx = all_texts
            .iter()
            .position(|t| t == text)
            .unwrap_or_else(|| panic!("text not found in corpus: {}", &text[..text.len().min(50)]));
        &result.embeddings[idx]
    };

    // ── test 1: graded similarity ──
    println!("\n  [1] GRADED SIMILARITY (spearman ρ)");
    let mut model_scores: Vec<f64> = Vec::new();
    let mut human_scores: Vec<f64> = Vec::new();
    let mut similar_cosines: Vec<f64> = Vec::new();
    let mut dissimilar_cosines: Vec<f64> = Vec::new();

    for (a, b, human_score) in similarity_pairs {
        let cos = embedrs::cosine_similarity(lookup(a), lookup(b));
        model_scores.push(cos as f64);
        human_scores.push(*human_score as f64);
        if *human_score >= 3.5 {
            similar_cosines.push(cos as f64);
        } else if *human_score <= 1.5 {
            dissimilar_cosines.push(cos as f64);
        }
    }

    let rho = spearman_correlation(&human_scores, &model_scores);
    let sim_avg = mean(&similar_cosines);
    let dis_avg = mean(&dissimilar_cosines);
    let gap = sim_avg - dis_avg;
    println!("      ρ = {rho:.4}");
    println!(
        "      similar avg cosine   = {sim_avg:.4} ({} pairs)",
        similar_cosines.len()
    );
    println!(
        "      dissimilar avg cosine = {dis_avg:.4} ({} pairs)",
        dissimilar_cosines.len()
    );
    println!("      discrimination gap    = {gap:.4}");

    // ── test 2: retrieval accuracy ──
    println!("\n  [2] RETRIEVAL ACCURACY");
    let mut top1_hits = 0;
    let mut top3_hits = 0;
    let mut mrr_sum = 0.0;
    let total = retrieval_sets.len();

    for (query, candidates, correct_idx) in retrieval_sets {
        let q_emb = lookup(query);
        let mut scored: Vec<(usize, f32)> = candidates
            .iter()
            .enumerate()
            .map(|(i, c)| (i, embedrs::cosine_similarity(q_emb, lookup(c))))
            .collect();
        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());

        let rank = scored.iter().position(|(i, _)| *i == *correct_idx).unwrap() + 1;
        mrr_sum += 1.0 / rank as f64;

        if rank == 1 {
            top1_hits += 1;
        }
        if rank <= 3 {
            top3_hits += 1;
        }
    }

    let mrr = mrr_sum / total as f64;
    println!(
        "      top-1:  {top1_hits}/{total} ({:.0}%)",
        pct(top1_hits, total)
    );
    println!(
        "      top-3:  {top3_hits}/{total} ({:.0}%)",
        pct(top3_hits, total)
    );
    println!("      MRR:    {mrr:.4}");

    // ── test 3: multilingual breakdown ──
    println!("\n  [3] MULTILINGUAL BREAKDOWN");
    let en_pairs: Vec<_> = similarity_pairs
        .iter()
        .filter(|(a, _, _)| a.is_ascii())
        .collect();
    let zh_pairs: Vec<_> = similarity_pairs
        .iter()
        .filter(|(a, _, _)| {
            !a.is_ascii() && a.chars().any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
        })
        .collect();
    let ja_pairs: Vec<_> = similarity_pairs
        .iter()
        .filter(|(a, _, _)| {
            !a.is_ascii() && a.chars().any(|c| ('\u{3040}'..='\u{30ff}').contains(&c))
        })
        .collect();

    for (label, pairs) in [
        ("english", &en_pairs),
        ("chinese", &zh_pairs),
        ("japanese", &ja_pairs),
    ] {
        if pairs.is_empty() {
            continue;
        }
        let h: Vec<f64> = pairs.iter().map(|(_, _, s)| *s as f64).collect();
        let m: Vec<f64> = pairs
            .iter()
            .map(|(a, b, _)| embedrs::cosine_similarity(lookup(a), lookup(b)) as f64)
            .collect();
        let r = spearman_correlation(&h, &m);
        println!("      {label:8} ρ = {r:.4} ({} pairs)", pairs.len());
    }

    // ── test 4: cross-lingual alignment ──
    println!("\n  [4] CROSS-LINGUAL ALIGNMENT");
    let mut cross_sims: Vec<f64> = Vec::new();
    for group in crosslingual_groups {
        // all pairs within group should be highly similar
        for i in 0..group.len() {
            for j in (i + 1)..group.len() {
                let cos = embedrs::cosine_similarity(lookup(group[i]), lookup(group[j]));
                cross_sims.push(cos as f64);
            }
        }
    }
    let cross_avg = mean(&cross_sims);
    let cross_min = cross_sims.iter().cloned().fold(f64::INFINITY, f64::min);
    let cross_max = cross_sims.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
    println!(
        "      avg cosine = {cross_avg:.4} (min={cross_min:.4}, max={cross_max:.4}, {} pairs)",
        cross_sims.len()
    );
    println!("      ideal: avg → 1.0 (same meaning across en/zh/ja)");

    // ── test 5: text length sensitivity ──
    println!("\n  [5] TEXT LENGTH SENSITIVITY");
    let short_pairs: Vec<_> = length_pairs
        .iter()
        .filter(|(a, _, _)| a.len() < 50)
        .collect();
    let medium_pairs: Vec<_> = length_pairs
        .iter()
        .filter(|(a, _, _)| a.len() >= 50 && a.len() < 200)
        .collect();
    let long_pairs: Vec<_> = length_pairs
        .iter()
        .filter(|(a, _, _)| a.len() >= 200)
        .collect();

    for (label, pairs) in [
        ("short", &short_pairs),
        ("medium", &medium_pairs),
        ("long", &long_pairs),
    ] {
        if pairs.is_empty() {
            continue;
        }
        let h: Vec<f64> = pairs.iter().map(|(_, _, s)| *s as f64).collect();
        let m: Vec<f64> = pairs
            .iter()
            .map(|(a, b, _)| embedrs::cosine_similarity(lookup(a), lookup(b)) as f64)
            .collect();
        let r = if h.len() >= 3 {
            spearman_correlation(&h, &m)
        } else {
            // too few pairs for meaningful spearman, use pearson
            pearson_correlation(&h, &m)
        };
        let avg_cos = mean(&m);
        println!(
            "      {label:6} ρ = {r:.4}, avg cosine = {avg_cos:.4} ({} pairs)",
            pairs.len()
        );
    }

    // ── test 6: robustness ──
    println!("\n  [6] ROBUSTNESS (typos, casing, word order)");
    let mut robust_scores: Vec<f64> = Vec::new();
    for (original, variants) in robustness_pairs {
        let orig_emb = lookup(original);
        for variant in *variants {
            let cos = embedrs::cosine_similarity(orig_emb, lookup(variant));
            robust_scores.push(cos as f64);
        }
    }
    let robust_avg = mean(&robust_scores);
    let robust_min = robust_scores.iter().cloned().fold(f64::INFINITY, f64::min);
    println!(
        "      avg cosine = {robust_avg:.4} (min={robust_min:.4}, {} variants)",
        robust_scores.len()
    );
    println!("      ideal: avg → 1.0 (meaning unchanged despite surface variation)");

    // ── test 7: clustering quality ──
    println!("\n  [7] CLUSTERING QUALITY");
    // for each text, find its nearest neighbor; if same cluster → hit
    let mut all_cluster_texts: Vec<(&str, &str)> = Vec::new(); // (text, label)
    for (label, texts) in cluster_groups {
        for t in *texts {
            all_cluster_texts.push((t, label));
        }
    }

    let mut nn_hits = 0;
    let total_cluster = all_cluster_texts.len();
    for i in 0..total_cluster {
        let (text_i, label_i) = all_cluster_texts[i];
        let emb_i = lookup(text_i);
        let mut best_sim = f64::NEG_INFINITY;
        let mut best_label = "";
        for j in 0..total_cluster {
            if i == j {
                continue;
            }
            let (text_j, label_j) = all_cluster_texts[j];
            let cos = embedrs::cosine_similarity(emb_i, lookup(text_j)) as f64;
            if cos > best_sim {
                best_sim = cos;
                best_label = label_j;
            }
        }
        if best_label == label_i {
            nn_hits += 1;
        }
    }
    let nn_purity = pct(nn_hits, total_cluster);
    println!("      nearest-neighbor purity = {nn_hits}/{total_cluster} ({nn_purity:.0}%)");

    // also compute inter/intra cluster ratio
    let mut intra_sims: Vec<f64> = Vec::new();
    let mut inter_sims: Vec<f64> = Vec::new();
    for i in 0..total_cluster {
        for j in (i + 1)..total_cluster {
            let cos = embedrs::cosine_similarity(
                lookup(all_cluster_texts[i].0),
                lookup(all_cluster_texts[j].0),
            ) as f64;
            if all_cluster_texts[i].1 == all_cluster_texts[j].1 {
                intra_sims.push(cos);
            } else {
                inter_sims.push(cos);
            }
        }
    }
    let intra_avg = mean(&intra_sims);
    let inter_avg = mean(&inter_sims);
    println!("      intra-cluster avg = {intra_avg:.4} (same topic)");
    println!("      inter-cluster avg = {inter_avg:.4} (different topics)");
    println!(
        "      separation ratio  = {:.2}x",
        intra_avg / inter_avg.max(0.001)
    );

    // ── test 8: throughput ──
    println!("\n  [8] THROUGHPUT (500 texts)");
    let throughput_texts: Vec<String> = (0..500)
        .map(|i| {
            format!(
                "This is benchmark sentence number {} for measuring embedding throughput performance across different providers and models",
                i
            )
        })
        .collect();

    let t = Instant::now();
    let tp_result = client.embed_batch(throughput_texts.clone()).await?;
    let tp_elapsed = t.elapsed();
    let tp_ms = tp_elapsed.as_secs_f64() * 1000.0;
    let texts_per_sec = 500.0 / tp_elapsed.as_secs_f64();
    println!("      500 texts in {tp_ms:.0}ms ({texts_per_sec:.0} texts/sec)");
    println!("      {:.1}ms/text", tp_ms / 500.0);
    if tp_result.usage.total_tokens > 0 {
        println!("      tokens: {}", tp_result.usage.total_tokens);
    }

    println!();
    Ok(())
}

// ═══════════════════════════════════════════
//  test data
// ═══════════════════════════════════════════

/// graded similarity pairs: (text_a, text_b, human_score 0.0-5.0)
fn graded_similarity_pairs() -> Vec<(&'static str, &'static str, f32)> {
    vec![
        // === english: high similarity (4.0-5.0) ===
        (
            "A dog is running through the grass",
            "A dog runs across a green field",
            4.8,
        ),
        (
            "The stock market crashed today",
            "Financial markets experienced a sharp decline",
            4.5,
        ),
        (
            "She plays the piano beautifully",
            "She is a talented pianist",
            4.3,
        ),
        (
            "The children are playing in the park",
            "Kids are having fun at the playground",
            4.2,
        ),
        (
            "He is cooking dinner in the kitchen",
            "A man prepares a meal at home",
            4.0,
        ),
        (
            "Scientists discovered a new species of fish",
            "Researchers found a previously unknown fish species",
            4.7,
        ),
        (
            "The movie received excellent reviews",
            "Critics praised the film highly",
            4.4,
        ),
        (
            "She graduated from university last year",
            "She completed her college degree recently",
            4.1,
        ),
        // === english: medium similarity (2.0-3.5) ===
        (
            "The cat sat on the windowsill",
            "A bird was perched on the fence",
            2.5,
        ),
        (
            "He drives to work every morning",
            "She takes the bus to school",
            2.8,
        ),
        (
            "The restaurant serves Italian food",
            "The cafe has a French menu",
            3.0,
        ),
        (
            "I enjoy reading science fiction novels",
            "She likes watching fantasy movies",
            2.7,
        ),
        (
            "The temperature dropped below freezing",
            "It was a very cold winter day",
            3.5,
        ),
        (
            "He is a software engineer at Google",
            "She works as a data scientist at Meta",
            3.0,
        ),
        // === english: low similarity (0.0-1.5) ===
        (
            "The sun rises in the east",
            "Quantum entanglement violates local realism",
            0.2,
        ),
        (
            "She ordered a cappuccino at Starbucks",
            "The Pythagorean theorem relates triangle sides",
            0.1,
        ),
        (
            "The train arrived at platform 3",
            "Photosynthesis converts light into energy",
            0.3,
        ),
        (
            "He adopted a golden retriever puppy",
            "The GDP of Japan decreased in Q3",
            0.2,
        ),
        (
            "The concert tickets sold out quickly",
            "Mitochondria are the powerhouse of the cell",
            0.1,
        ),
        (
            "We went hiking in the mountains",
            "TCP/IP is the backbone of the internet",
            0.3,
        ),
        (
            "The baby started crawling last week",
            "The Fourier transform decomposes signals",
            0.1,
        ),
        // === chinese ===
        (
            "今天天气真好,适合出去走走",
            "天气晴朗,是个散步的好日子",
            4.5,
        ),
        (
            "机器学习需要大量的训练数据",
            "人工智能系统依赖海量数据集进行学习",
            4.3,
        ),
        ("他在大学学习计算机科学", "她是一名软件工程专业的学生", 3.2),
        ("这家餐厅的菜很好吃", "今天的股票市场表现不佳", 0.3),
        ("春天来了,花都开了", "量子计算机使用量子比特", 0.1),
        ("我喜欢在周末看电影", "她每天早上跑步锻炼身体", 1.5),
        // === japanese ===
        ("東京は日本の首都です", "日本の首都は東京である", 5.0),
        (
            "彼は毎日電車で通勤しています",
            "彼女はバスで学校に通っています",
            2.8,
        ),
        (
            "桜の季節は本当に美しい",
            "プログラミング言語の構文解析",
            0.2,
        ),
        (
            "このレストランのラーメンは最高だ",
            "あのカフェのコーヒーは美味しい",
            2.5,
        ),
        (
            "人工知能の研究が進んでいる",
            "AI技術は急速に発展している",
            4.6,
        ),
    ]
}

/// retrieval test: (query, candidates, index_of_correct_answer)
fn retrieval_test_sets() -> Vec<(&'static str, &'static [&'static str], usize)> {
    vec![
        (
            "How to sort an array in Python?",
            &[
                "Python's sorted() function returns a new sorted list from an iterable",
                "JavaScript uses Array.prototype.sort() for sorting",
                "The quick brown fox jumps over the lazy dog",
                "Machine learning algorithms can classify images",
            ],
            0,
        ),
        (
            "What causes climate change?",
            &[
                "The recipe calls for two cups of flour",
                "Greenhouse gas emissions from burning fossil fuels trap heat in the atmosphere",
                "The stock market saw gains in the tech sector",
                "Regular exercise improves cardiovascular health",
            ],
            1,
        ),
        (
            "Best practices for database indexing",
            &[
                "Yoga and meditation can reduce stress levels",
                "The history of ancient Rome spans over a thousand years",
                "Create indexes on columns frequently used in WHERE clauses and JOIN conditions",
                "Modern art galleries feature contemporary paintings",
            ],
            2,
        ),
        (
            "How does photosynthesis work?",
            &[
                "Investment portfolios should be diversified across asset classes",
                "The Eiffel Tower was completed in 1889",
                "Blockchain technology uses distributed ledgers for transactions",
                "Plants convert sunlight, water, and CO2 into glucose and oxygen using chlorophyll",
            ],
            3,
        ),
        (
            "Symptoms of vitamin D deficiency",
            &[
                "Low vitamin D levels can cause fatigue, bone pain, muscle weakness, and mood changes",
                "The latest smartphone features a triple camera system",
                "Renaissance art flourished in 15th century Italy",
                "Rust's ownership system prevents memory safety bugs at compile time",
            ],
            0,
        ),
        (
            "如何学习一门新的编程语言?",
            &[
                "通过阅读官方文档、做练习项目和参与开源社区来学习编程语言",
                "今天的天气预报说下午会下雨",
                "这部电影获得了奥斯卡最佳影片奖",
                "健康饮食应该包含足够的蔬菜和水果",
            ],
            0,
        ),
        (
            "为什么要使用版本控制系统?",
            &[
                "瑜伽有助于放松身心,减轻压力",
                "中国的长城是世界七大奇迹之一",
                "Git等版本控制系统可以追踪代码变更、支持多人协作、方便回滚",
                "今年的樱花比往年开得早",
            ],
            2,
        ),
        (
            "Rustプログラミング言語の特徴は?",
            &[
                "今日のランチはカレーライスにしよう",
                "Rustは所有権システムによりメモリ安全性をコンパイル時に保証する言語です",
                "東京オリンピックは2021年に開催された",
                "この映画の評価はとても高い",
            ],
            1,
        ),
        (
            "健康的な食生活とは?",
            &[
                "バランスの取れた栄養摂取と規則正しい食事時間が健康的な食生活の基本です",
                "新幹線は時速300キロで走行する",
                "プログラミングの基礎を学ぶにはPythonがおすすめ",
                "来週の会議は水曜日に変更になった",
            ],
            0,
        ),
    ]
}

/// cross-lingual: same sentence in en/zh/ja — embeddings should be close
fn crosslingual_groups() -> Vec<&'static [&'static str]> {
    vec![
        &[
            "Artificial intelligence is transforming the world",
            "人工智能正在改变世界",
            "人工知能は世界を変えている",
        ],
        &[
            "The weather is beautiful today",
            "今天天气很好",
            "今日は天気がいい",
        ],
        &[
            "I like to read books in my free time",
            "我喜欢在空闲时间读书",
            "暇な時に本を読むのが好きです",
        ],
        &[
            "Programming is a valuable skill to learn",
            "编程是一项值得学习的技能",
            "プログラミングは学ぶ価値のあるスキルだ",
        ],
        &[
            "Climate change is a global challenge",
            "气候变化是一个全球性挑战",
            "気候変動は地球規模の課題である",
        ],
        &[
            "Music can improve your mood",
            "音乐可以改善你的心情",
            "音楽は気分を良くしてくれる",
        ],
    ]
}

/// text length sensitivity: short (<50 chars), medium (50-200), long (200+)
fn length_sensitivity_pairs() -> Vec<(&'static str, &'static str, f32)> {
    vec![
        // short pairs (<50 chars)
        ("dogs are loyal", "dogs are faithful animals", 4.5),
        ("it is raining", "the weather is wet", 3.8),
        ("she smiled", "he frowned", 1.5),
        ("fast car", "quick automobile", 4.7),
        // medium pairs (50-200 chars)
        (
            "Machine learning algorithms can identify patterns in large datasets",
            "Deep learning models are trained to recognize complex patterns in big data",
            4.2,
        ),
        (
            "The new restaurant downtown has an excellent selection of seafood dishes",
            "There is a great seafood place that recently opened in the city center",
            4.0,
        ),
        (
            "Regular physical exercise helps maintain cardiovascular health and reduces stress",
            "Consistent workout routines contribute to heart health and lower anxiety levels",
            4.3,
        ),
        // long pairs (200+ chars)
        (
            "The rapid advancement of artificial intelligence and machine learning technologies has fundamentally transformed how businesses operate, enabling automated decision-making, predictive analytics, and personalized customer experiences at scale across various industries worldwide",
            "AI and ML breakthroughs are reshaping the corporate landscape by automating decisions, forecasting trends, and delivering tailored user experiences to millions of customers in diverse sectors globally",
            4.5,
        ),
        (
            "Sustainable development requires balancing economic growth with environmental protection and social equity, ensuring that the needs of the present generation are met without compromising the ability of future generations to meet their own needs, as outlined in the Brundtland Report",
            "The concept of sustainability involves finding equilibrium between economic progress, ecological conservation, and social fairness so that current prosperity does not come at the expense of opportunities for those who will come after us",
            4.6,
        ),
        (
            "Quantum computing leverages the principles of quantum mechanics, including superposition and entanglement, to perform certain computations exponentially faster than classical computers, with potential applications in cryptography, drug discovery, and optimization problems",
            "The Renaissance was a cultural movement that began in Italy in the 14th century, characterized by renewed interest in classical Greek and Roman art, literature, and philosophy, which eventually spread throughout Europe and laid the groundwork for the modern age",
            0.3,
        ),
    ]
}

/// robustness: original → variations (typos, casing, word order)
fn robustness_pairs() -> Vec<(&'static str, &'static [&'static str])> {
    vec![
        (
            "The quick brown fox jumps over the lazy dog",
            &[
                // all uppercase
                "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG",
                // all lowercase
                "the quick brown fox jumps over the lazy dog",
                // typos
                "The quikc brown fox jumsp over the layz dog",
                // word order change (passive)
                "The lazy dog was jumped over by the quick brown fox",
                // extra whitespace / punctuation
                "The  quick  brown  fox  jumps  over  the  lazy  dog.",
            ],
        ),
        (
            "Machine learning is a subset of artificial intelligence",
            &[
                "MACHINE LEARNING IS A SUBSET OF ARTIFICIAL INTELLIGENCE",
                "machine learning is a subset of artificial intelligence",
                "Machin lerning is a subst of artifical inteligence",
                "Artificial intelligence includes machine learning as a subset",
                "Machine  learning  is  a  subset  of  artificial  intelligence!",
            ],
        ),
        (
            "She bought three red apples from the grocery store",
            &[
                "SHE BOUGHT THREE RED APPLES FROM THE GROCERY STORE",
                "she bought three red apples from the grocery store",
                "She bougt three red aples from the grocey store",
                "From the grocery store, three red apples were bought by her",
                "She bought  three  red  apples  from the  grocery  store.",
            ],
        ),
    ]
}

/// cluster groups: texts grouped by topic for clustering quality test
fn cluster_groups() -> Vec<(&'static str, &'static [&'static str])> {
    vec![
        (
            "technology",
            &[
                "Python is a popular programming language for data science",
                "The new GPU delivers 50% better performance than its predecessor",
                "Kubernetes orchestrates containerized applications at scale",
                "Version control with Git is essential for software development",
                "Cloud computing enables on-demand access to computing resources",
            ],
        ),
        (
            "cooking",
            &[
                "Preheat the oven to 350 degrees before baking the cake",
                "Fresh herbs add depth of flavor to any dish",
                "The secret to a good risotto is constant stirring and warm broth",
                "Sear the steak on high heat for a perfect crust",
                "Homemade pasta requires only flour, eggs, and a bit of salt",
            ],
        ),
        (
            "sports",
            &[
                "The marathon runner completed the race in under three hours",
                "Basketball requires both individual skill and team coordination",
                "Swimming is an excellent low-impact cardiovascular exercise",
                "The tennis match went to five sets before the champion prevailed",
                "Training for a triathlon involves swimming, cycling, and running",
            ],
        ),
        (
            "finance",
            &[
                "Diversifying your investment portfolio reduces overall risk",
                "The Federal Reserve raised interest rates by 25 basis points",
                "Compound interest is the most powerful force in wealth building",
                "ETFs offer broad market exposure with lower fees than mutual funds",
                "A strong balance sheet indicates financial stability and low debt",
            ],
        ),
    ]
}

// ═══════════════════════════════════════════
//  math utilities
// ═══════════════════════════════════════════

fn mean(v: &[f64]) -> f64 {
    if v.is_empty() {
        return 0.0;
    }
    v.iter().sum::<f64>() / v.len() as f64
}

fn pct(num: usize, den: usize) -> f64 {
    num as f64 / den as f64 * 100.0
}

fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
    assert_eq!(x.len(), y.len());
    let n = x.len() as f64;
    let rank_x = ranks(x);
    let rank_y = ranks(y);
    let d_sq_sum: f64 = rank_x
        .iter()
        .zip(rank_y.iter())
        .map(|(rx, ry)| (rx - ry).powi(2))
        .sum();
    1.0 - (6.0 * d_sq_sum) / (n * (n * n - 1.0))
}

fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
    let mean_x = mean(x);
    let mean_y = mean(y);
    let mut num = 0.0;
    let mut den_x = 0.0;
    let mut den_y = 0.0;
    for i in 0..x.len() {
        let dx = x[i] - mean_x;
        let dy = y[i] - mean_y;
        num += dx * dy;
        den_x += dx * dx;
        den_y += dy * dy;
    }
    let den = (den_x * den_y).sqrt();
    if den < 1e-12 {
        return 0.0;
    }
    num / den
}

fn ranks(values: &[f64]) -> Vec<f64> {
    let n = values.len();
    let mut indexed: Vec<(usize, f64)> = values.iter().copied().enumerate().collect();
    indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
    let mut result = vec![0.0; n];
    let mut i = 0;
    while i < n {
        let mut j = i;
        while j < n - 1 && (indexed[j + 1].1 - indexed[j].1).abs() < 1e-12 {
            j += 1;
        }
        let avg_rank = (i + j) as f64 / 2.0 + 1.0;
        for k in i..=j {
            result[indexed[k].0] = avg_rank;
        }
        i = j + 1;
    }
    result
}