vicinity 0.2.0

Approximate Nearest Neighbor Search: HNSW, DiskANN, IVF-PQ, ScaNN, quantization
Documentation
[package]
name = "vicinity"
version = "0.2.0"
authors = ["Arc <attobop@gmail.com>"]
edition = "2021"
rust-version = "1.80"
description = "Approximate Nearest Neighbor Search: HNSW, DiskANN, IVF-PQ, ScaNN, quantization"
repository = "https://github.com/arclabs561/vicinity"
documentation = "https://docs.rs/vicinity"
license = "MIT OR Apache-2.0"
keywords = ["ann", "hnsw", "vector-search", "similarity", "nearest-neighbor"]
categories = ["algorithms", "science"]

[lib]
# Pure rlib for normal Rust usage.
# PyO3 cdylib is built by maturin (sets PYO3_BUILD_EXTENSION_MODULE=1).
crate-type = ["rlib"]

[dependencies]
smallvec = "1.11"
rand = "0.9"
serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }
thiserror = "2.0"

# Python bindings (feature-gated; used for ann-benchmarks integration)
# Note: do NOT enable `pyo3/extension-module` here. It breaks `cargo test --all-features`
# by disabling libpython linking for test binaries on macOS.
# Maturin sets `PYO3_BUILD_EXTENSION_MODULE=1` when building wheels.
pyo3 = { version = "0.27.2", optional = true }
numpy = { version = "0.27.0", optional = true }

# SIMD primitives (ecosystem crate)
innr = { version = "0.2.0", optional = true }

# High-performance SIMD (ashvardanian/SimSIMD bindings)
# Provides up to 200x speedups for vector distance functions
simsimd = { version = "6.5.12", optional = true }

# Linear algebra for OPQ Procrustes solver (SVD)
nalgebra = { version = "0.33", optional = true, default-features = false, features = [
  "std",
] }

# Parallel batch search (feature-gated)
rayon = { version = "1.10", optional = true }

# ID compression primitives (ecosystem crate)
cnk = { version = "0.1.3", optional = true }

# Succinct data structures (ecosystem crate)
sbits = { version = "0.1.1", optional = true }

# Hierarchical clustering (EVoC)
clump = { version = "0.5.2", optional = true }

# Spectral statistics (optional).
rmt = { version = "0.1.2", optional = true }

# Vector quantization primitives (extracted from `vicinity`)
qntz = { package = "qntz", version = "0.1.1", optional = true }

# Persistence & IO (all optional, gated behind `persistence`)
byteorder = { version = "1.5", optional = true }
crc32fast = { version = "1.4", optional = true }
libc = { version = "0.2", optional = true }
hex = { version = "0.4", optional = true }
memmap2 = { version = "0.9", optional = true }
postcard = { version = "1.0", optional = true, features = [
  "use-crc",
  "alloc",
  "use-std",
] }
bytemuck = { version = "1.14", optional = true, features = ["derive"] }
durability = { version = "0.2.0", optional = true }

# Legacy serialization (kept for backward compatibility)
bincode = { version = "1.3", optional = true }

# RNG backend for wasm32 (rand -> getrandom needs a JS backend on wasm).
[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.3", features = ["wasm_js"] }

[features]
default = ["hnsw", "innr"]
persistence = [
  "dep:serde",
  "dep:serde_json",
  "smallvec/serde",
  "dep:postcard",
  "dep:bytemuck",
  "dep:memmap2",
  "dep:durability",
  "dep:byteorder",
  "dep:crc32fast",
  "dep:libc",
  "dep:hex",
]
serde = ["dep:serde", "dep:serde_json", "smallvec/serde"]
hnsw = []
experimental = []
ivf_pq = [
  "dep:clump",
  "dep:nalgebra",
  "dep:serde",
  "dep:serde_json",
  "smallvec/serde",
]
diskann = ["dep:serde_json"]
scann = ["dep:clump"]
sng = []
vamana = ["hnsw"]
quantization = ["qntz"]
benchmark = []
innr = ["dep:innr"]
simsimd = ["dep:simsimd"]
parallel = ["dep:rayon"]
python = ["dep:pyo3", "dep:numpy", "hnsw"]

# Feature names referenced by cfgs in the extracted code.
# These are currently mostly organizational toggles.
nsw = []
kdtree = []
balltree = []
kmeans_tree = []
rptree = []
rmt-spectral = ["dep:rmt"]
evoc = ["dep:clump"]

id-compression = ["dep:cnk", "dep:sbits", "cnk/sbits"]
persistence-bincode = ["dep:bincode", "persistence"]
# Quantization sub-features (kept separate so callers can be explicit)
rabitq = ["qntz", "qntz/rabitq"]
saq = ["qntz", "qntz/ternary"]
# Re-exported quantizers (from `qntz`)
qntz = ["dep:qntz"]

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1.5"
tempfile = "3"
memmap2 = "0.9"
anyhow = "1.0"
tokio = { version = "1.40", features = ["macros", "rt-multi-thread"] }
rankops = "0.1.4"

[[example]]
name = "ivf_pq_demo"
path = "examples/ivf_pq_demo.rs"
required-features = ["ivf_pq"]

[[example]]
name = "rabitq_demo"
path = "examples/rabitq_demo.rs"
required-features = ["rabitq", "hnsw", "quantization"]

[[example]]
name = "evoc_demo"
path = "examples/evoc_demo.rs"
required-features = ["evoc"]

[[example]]
name = "sift_benchmark"
path = "examples/sift_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "retrieve_and_rerank"
path = "examples/retrieve_and_rerank.rs"
required-features = ["hnsw"]

[[example]]
name = "01_basic_search"
path = "examples/01_basic_search.rs"
required-features = ["hnsw"]

[[example]]
name = "02_measure_recall"
path = "examples/02_measure_recall.rs"
required-features = ["hnsw"]

[[example]]
name = "03_quick_benchmark"
path = "examples/03_quick_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "04_rigorous_benchmark"
path = "examples/04_rigorous_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "05_normalization_matters"
path = "examples/05_normalization_matters.rs"
required-features = ["hnsw"]

[[example]]
name = "hnsw_benchmark"
path = "examples/hnsw_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "semantic_search_demo"
path = "examples/semantic_search_demo.rs"
required-features = ["hnsw"]

[[example]]
name = "lid_demo"
path = "examples/lid_demo.rs"
required-features = ["hnsw"]

[[example]]
name = "lid_outlier_detection"
path = "examples/lid_outlier_detection.rs"
required-features = ["hnsw"]

[[example]]
name = "dual_branch_demo"
path = "examples/dual_branch_demo.rs"
required-features = ["hnsw"]

[[example]]
name = "dual_branch_hnsw_demo"
path = "examples/dual_branch_hnsw_demo.rs"
required-features = ["hnsw"]

[[example]]
name = "glove_benchmark"
path = "examples/glove_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "ann_benchmark"
path = "examples/ann_benchmark.rs"
required-features = ["hnsw"]

[[example]]
name = "hybrid_search"
path = "examples/hybrid_search.rs"
required-features = ["hnsw"]

[[example]]
name = "wasm_search"
path = "examples/wasm_search.rs"
required-features = ["hnsw"]

[[example]]
name = "embedding_pipeline"
path = "examples/embedding_pipeline.rs"
required-features = ["hnsw"]

[[bench]]
name = "hnsw"
harness = false

[[bench]]
name = "distance"
harness = false

[[bench]]
name = "recall"
harness = false

[[bench]]
name = "memory"
harness = false

[[bench]]
name = "scaling"
harness = false

[lints.rust]
rust_2018_idioms = { level = "warn", priority = -1 }
rust_2021_compatibility = { level = "warn", priority = -1 }
future_incompatible = { level = "warn", priority = -1 }
nonstandard_style = { level = "warn", priority = -1 }
# Let Cargo auto-infer feature values from [features] table.
# Only list non-feature cfgs here (e.g. target-specific).
unexpected_cfgs = "warn"
# Documentation lints: temporarily relaxed, enable incrementally
# missing_docs = "warn"
# missing_doc_code_examples = "warn"
# unsafe_code = "warn"  # SIMD and low-level ops require unsafe
unstable_features = "deny"
unused_import_braces = "warn"
# unused_qualifications = "warn"  # Relaxed: explicit paths often improve clarity
# unused_results = "warn"  # Relaxed: HashSet/HashMap inserts often ignore return value
# trivial_numeric_casts = "warn"  # Relaxed: explicit casts help with type clarity
variant_size_differences = "warn"
unused_extern_crates = "warn"
unused_lifetimes = "warn"
semicolon_in_expressions_from_macros = "warn"
unsafe_op_in_unsafe_fn = "warn"
unused_macro_rules = "warn"

[lints.clippy]
correctness = { level = "deny", priority = -1 }
perf = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }
complexity = { level = "warn", priority = -1 }
style = { level = "warn", priority = -1 }
suspicious = { level = "warn", priority = -1 }
restriction = { level = "allow", priority = -1 }
unwrap_in_result = "warn"
unwrap_used = "warn"
expect_used = "warn"
mutex_atomic = "warn"
large_enum_variant = "warn"
empty_loop = "deny"
never_loop = "warn"
doc_markdown = "allow"
too_many_arguments = "allow"
type_complexity = "allow"
multiple_crate_versions = "allow"
wrong_self_convention = "allow"
module_name_repetitions = "allow"