use std::sync::Arc;
use arrow::util::pretty::pretty_format_batches;
use arrow_array::{Array, FixedSizeListArray, Float32Array, LargeStringArray, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use infino::{BoolMode, IndexSpec, Metric, VectorFilter, VectorSearchOptions, connect};
const EMB_DIM: usize = 16;
const DEMO_N_CENT: usize = 1;
const SEARCH_TOP_K: usize = 10;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let db = connect("memory://")?;
let schema = Arc::new(Schema::new(vec![
Field::new("title", DataType::LargeUtf8, false),
Field::new("emb", vector_field(EMB_DIM), false),
]));
let docs = db.create_table(
"docs",
schema.clone(),
IndexSpec::new()
.fts("title")
.vector("emb", EMB_DIM, DEMO_N_CENT, Metric::Cosine),
)?;
let titles = ["the quick brown fox", "a lazy sleeping dog"];
docs.append(&build_batch(schema, &titles)?)?;
println!("== A. BM25 full-text search for \"fox\" ==");
let hits = docs.bm25_search(
"title",
"fox",
SEARCH_TOP_K,
BoolMode::Or,
Some(&["_id", "title", "score"]),
)?;
print_batches(&hits);
println!("== B. vector kNN (query == row 0's embedding) ==");
let query = unit_embedding(0);
let knn = docs.vector_search(
"emb",
&query,
SEARCH_TOP_K,
VectorSearchOptions::new(),
None,
Some(&["_id", "title", "score"]),
)?;
print_batches(&knn);
println!("== B2. filtered vector kNN (title matches \"dog\") ==");
let filtered = docs.vector_search(
"emb",
&query,
SEARCH_TOP_K,
VectorSearchOptions::new(),
Some(VectorFilter {
column: "title",
query: "dog",
mode: BoolMode::Or,
}),
Some(&["_id", "title", "score"]),
)?;
print_batches(&filtered);
println!("== C. SQL over the same table ==");
let rows = db.query_sql("SELECT _id, title FROM docs ORDER BY _id")?;
print_batches(&rows);
Ok(())
}
fn vector_field(dim: usize) -> DataType {
DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Float32, true)),
dim as i32,
)
}
fn unit_embedding(row: usize) -> Vec<f32> {
let mut v = vec![0.0f32; EMB_DIM];
v[row % EMB_DIM] = 1.0;
v
}
fn build_batch(
schema: Arc<Schema>,
titles: &[&str],
) -> Result<RecordBatch, Box<dyn std::error::Error>> {
let title_col = LargeStringArray::from(titles.to_vec());
let mut flat = Vec::with_capacity(titles.len() * EMB_DIM);
for row in 0..titles.len() {
flat.extend_from_slice(&unit_embedding(row));
}
let item_field = Arc::new(Field::new("item", DataType::Float32, true));
let emb_col = FixedSizeListArray::try_new(
item_field,
EMB_DIM as i32,
Arc::new(Float32Array::from(flat)) as Arc<dyn Array>,
None,
)?;
Ok(RecordBatch::try_new(
schema,
vec![Arc::new(title_col), Arc::new(emb_col)],
)?)
}
fn print_batches(batches: &[RecordBatch]) {
match pretty_format_batches(batches) {
Ok(table) => {
for line in table.to_string().lines() {
println!(" {line}");
}
}
Err(e) => println!(" <failed to format batches: {e}>"),
}
println!();
}