Skip to main content

hybrid_retrieval/
hybrid_retrieval.rs

1//! Phase 8d — hybrid retrieval worked example. Combines Phase 8b's
2//! BM25 lexical index with Phase 7d's vector cosine similarity into a
3//! single ORDER BY, showing where each shape wins and why fusing the
4//! two beats either alone.
5//!
6//! Run with: `cargo run --example hybrid-retrieval`
7//!
8//! Corpus: 6 hand-written 1-sentence "tech blurbs", each with a
9//! pre-baked 4-dim embedding `[systems, scripting, database, web]`.
10//! Real RAG would call an embedding model; the math is identical.
11//! Vectors are hand-set so each query's expected ranking is obvious
12//! by inspection — no surprises from a neural net's latent space.
13//!
14//! See `README.md` for the narrative explanation.
15
16use sqlrite::{Connection, Result};
17
18// (name, body, embedding) per doc.
19const CORPUS: &[(&str, &str, [f32; 4])] = &[
20    (
21        "doc1",
22        "rust is a systems programming language",
23        [0.9, 0.0, 0.0, 0.0],
24    ),
25    (
26        "doc2",
27        "python is great for data science",
28        [0.0, 0.9, 0.4, 0.0],
29    ),
30    (
31        "doc3",
32        "sqlite is an embedded database engine",
33        [0.0, 0.0, 0.9, 0.0],
34    ),
35    (
36        "doc4",
37        "postgres is a powerful relational database server",
38        [0.1, 0.0, 0.9, 0.5],
39    ),
40    (
41        "doc5",
42        "javascript runs in browsers and on servers",
43        [0.0, 0.7, 0.0, 0.8],
44    ),
45    (
46        "doc6",
47        "redis caches data in memory for fast lookups",
48        [0.0, 0.0, 0.6, 0.5],
49    ),
50];
51
52fn main() -> Result<()> {
53    let mut conn = Connection::open_in_memory()?;
54    conn.execute(
55        "CREATE TABLE docs (id INTEGER PRIMARY KEY, name TEXT, body TEXT, embedding VECTOR(4));",
56    )?;
57    for (name, body, vec) in CORPUS {
58        conn.execute(&format!(
59            "INSERT INTO docs (name, body, embedding) VALUES \
60             ('{name}', '{body}', [{}, {}, {}, {}]);",
61            vec[0], vec[1], vec[2], vec[3]
62        ))?;
63    }
64    conn.execute("CREATE INDEX docs_fts ON docs USING fts (body);")?;
65
66    // Same query, three rankings — see README for what each shape sees.
67    let body_query = "small embedded database";
68    let vector_query = [0.0, 0.0, 0.9, 0.2]; // semantic intent: "database, lightly web-ish"
69    let q_str = vec_lit(&vector_query);
70
71    println!("Corpus:");
72    for (name, body, vec) in CORPUS {
73        println!("  {name}: \"{body}\"  embedding={vec:?}");
74    }
75    println!("\nQuery body:   '{body_query}'");
76    println!("Query vector: {vector_query:?}\n");
77
78    println!("===  1. Pure BM25 (lexical) ===");
79    println!(
80        "WHERE  fts_match(body, '{body_query}')\n\
81         ORDER BY bm25_score(body, '{body_query}') DESC  LIMIT 3"
82    );
83    print_top(
84        &mut conn,
85        &format!(
86            "SELECT name, body FROM docs \
87             WHERE fts_match(body, '{body_query}') \
88             ORDER BY bm25_score(body, '{body_query}') DESC LIMIT 3;"
89        ),
90    )?;
91
92    println!("===  2. Pure vector (semantic) ===");
93    println!("ORDER BY vec_distance_cosine(embedding, {q_str}) ASC  LIMIT 3");
94    print_top(
95        &mut conn,
96        &format!(
97            "SELECT name, body FROM docs \
98             ORDER BY vec_distance_cosine(embedding, {q_str}) ASC LIMIT 3;"
99        ),
100    )?;
101
102    println!("===  3. Hybrid (50% BM25 + 50% inverted cosine) ===");
103    println!(
104        "WHERE  fts_match(body, '{body_query}')\n\
105         ORDER BY 0.5*bm25_score(...) + 0.5*(1.0 - vec_distance_cosine(...)) DESC  LIMIT 3"
106    );
107    print_top(
108        &mut conn,
109        &format!(
110            "SELECT name, body FROM docs \
111             WHERE fts_match(body, '{body_query}') \
112             ORDER BY 0.5 * bm25_score(body, '{body_query}') \
113                    + 0.5 * (1.0 - vec_distance_cosine(embedding, {q_str})) DESC \
114             LIMIT 3;"
115        ),
116    )?;
117    Ok(())
118}
119
120fn vec_lit(v: &[f32]) -> String {
121    let parts: Vec<String> = v.iter().map(|x| format!("{x}")).collect();
122    format!("[{}]", parts.join(", "))
123}
124
125fn print_top(conn: &mut Connection, sql: &str) -> Result<()> {
126    let stmt = conn.prepare(sql)?;
127    let mut rows = stmt.query()?;
128    let mut rank = 1;
129    while let Some(row) = rows.next()? {
130        let name: String = row.get_by_name("name")?;
131        let body: String = row.get_by_name("body")?;
132        println!("  {rank}. {name}  \"{body}\"");
133        rank += 1;
134    }
135    println!();
136    Ok(())
137}