normalize_semantic/lib.rs
1//! Semantic retrieval layer for normalize.
2//!
3//! This crate provides vector embeddings over structurally-derived chunks
4//! (symbols + doc comments + caller/callee context + co-change neighbors),
5//! stored in SQLite alongside the structural index, queryable by meaning
6//! rather than by name.
7//!
8//! ## Architecture
9//!
10//! - **[`config`]** -- `EmbeddingsConfig` (`[embeddings]` section of config.toml)
11//! - **[`chunks`]** -- context window construction from index rows
12//! - **[`embedder`]** -- fastembed wrapper (ONNX, no server required)
13//! - **[`schema`]** -- SQLite DDL for the `embeddings` table
14//! - **[`store`]** -- read/write embeddings to/from SQLite
15//! - **[`search`]** -- ANN search + staleness re-ranking
16//! - **[`populate`]** -- walk the structural index and embed symbols, docs, and commits
17//! - **[`service`]** -- CLI service (`normalize structure search`) -- `cli` feature
18//!
19//! ## Usage
20//!
21//! After `structure rebuild`, call [`populate::populate_embeddings`] with the
22//! active `FileIndex` connection to generate and store embeddings.
23//!
24//! For markdown and commit embeddings, call [`populate::populate_markdown_docs`]
25//! and [`populate::populate_commit_messages`] respectively.
26//! For `.normalize/context/` block embeddings, call [`populate::populate_context_blocks`].
27//!
28//! To search, call [`service::run_search`] (all source types) or
29//! [`service::run_context_search`] (context blocks only), or use
30//! [`store::load_all_embeddings`] + [`search::rerank`] directly.
31
32pub mod chunks;
33pub mod config;
34pub mod embedder;
35pub mod git_staleness;
36pub mod populate;
37pub mod schema;
38pub mod search;
39pub mod store;
40pub mod vec_ext;
41
42#[cfg(feature = "cli")]
43pub mod service;
44
45// Re-export the key public types for convenience.
46pub use config::EmbeddingsConfig;
47pub use populate::{
48 DEFAULT_MAX_COMMITS, PopulateStats, populate_commit_messages, populate_context_blocks,
49 populate_embeddings, populate_incremental_for_paths, populate_markdown_docs,
50};
51pub use search::SearchHit;
52
53use libsql::Connection;
54use normalize_facts::FileIndex;
55
56/// Open the index and return a reference to its SQLite connection.
57/// Convenience helper used by populate and service modules.
58pub async fn open_index(root: &std::path::Path) -> Result<FileIndex, libsql::Error> {
59 let normalize_dir = root.join(".normalize");
60 let db_path = normalize_dir.join("index.sqlite");
61 FileIndex::open(&db_path, root).await
62}
63
64/// Ensure the embeddings schema exists in the given connection.
65/// Safe to call multiple times (all DDL uses `IF NOT EXISTS`).
66pub async fn ensure_schema(conn: &Connection) -> Result<(), libsql::Error> {
67 store::ensure_schema(conn).await
68}