Skip to main content

veles_core/
lib.rs

1//! `veles-core` — fast, hybrid (BM25 + semantic) local code search.
2//!
3//! `veles-core` is the indexing and search engine that powers the [Veles]
4//! CLI, MCP server, and gRPC service. It walks a directory, chunks source
5//! files, builds a BM25 inverted index plus a dense
6//! [`model2vec-rs`][model2vec] embedding index, and serves hybrid queries
7//! using Reciprocal Rank Fusion. Tree-sitter is used to extract
8//! definitions for symbol-level lookups.
9//!
10//! Design goals:
11//!
12//! - **No GPU, no transformer forward pass at query time.** Embeddings
13//!   come from a static [model2vec] model, so query latency stays in
14//!   the tens of milliseconds on CPU.
15//! - **Persistent on-disk index.** Indexes live under `<repo>/.veles/`
16//!   and support incremental updates that reuse embeddings of unchanged
17//!   files.
18//! - **Pure Rust.** No Python interpreter, no protobuf compiler, no
19//!   native ML runtime — `cargo build --release` is enough.
20//!
21//! # Quick start
22//!
23//! ```no_run
24//! use std::path::Path;
25//! use veles_core::{SearchMode, VelesIndex};
26//!
27//! # fn main() -> anyhow::Result<()> {
28//! // Build an index from a directory. The first call downloads the
29//! // default embedding model (~64 MB) into the HuggingFace cache.
30//! let index = VelesIndex::from_path(Path::new("."), None, None, false)?;
31//!
32//! // Hybrid (BM25 + semantic) search — the default for most queries.
33//! let results = index.search(
34//!     "parse config file",
35//!     5,
36//!     SearchMode::Hybrid,
37//!     None,  // alpha — auto-detect from query type
38//!     None,  // language filter
39//!     None,  // path filter
40//! );
41//!
42//! for r in results {
43//!     println!("{} [{:.3}]", r.chunk.location(), r.score);
44//! }
45//! # Ok(())
46//! # }
47//! ```
48//!
49//! # Persistence
50//!
51//! Indexes can be saved to and loaded from `<repo>/.veles/`:
52//!
53//! ```no_run
54//! # use std::path::Path;
55//! # use veles_core::VelesIndex;
56//! # fn main() -> anyhow::Result<()> {
57//! let repo = Path::new(".");
58//! let index = VelesIndex::from_path(repo, None, None, false)?;
59//! index.save(repo)?;
60//!
61//! // Later, reload without re-embedding:
62//! let model = veles_core::model::load_model(None)?;
63//! let mut reloaded = VelesIndex::load(repo, model)?;
64//!
65//! // Refresh files that changed on disk; unchanged files keep their
66//! // embeddings.
67//! let report = reloaded.update_from_path(repo)?;
68//! eprintln!("{} added, {} modified, {} removed",
69//!     report.added_files, report.modified_files, report.removed_files);
70//! # Ok(())
71//! # }
72//! ```
73//!
74//! # Module overview
75//!
76//! - [`veles_index`] — the main [`VelesIndex`] type combining BM25, dense,
77//!   symbols, and persistence.
78//! - [`chunker`] — line-based source chunking with overlap.
79//! - [`tokenizer`] — identifier-aware tokeniser (camelCase, snake_case,
80//!   Cyrillic, CJK).
81//! - [`index`] — sparse ([`index::sparse`]) and dense
82//!   ([`index::dense`]) indexes, [`index::search`] entry points, and
83//!   [`index::topk`] selection.
84//! - [`ranking`] — query-type detection, definition boosts, file-path
85//!   penalties, file-saturation decay.
86//! - [`symbols`] — tree-sitter symbol extraction for Rust, Python,
87//!   JavaScript, TypeScript, and Go.
88//! - [`persist`] — on-disk format under `.veles/`.
89//! - [`walker`] — `.gitignore`-aware file walker (built on
90//!   [`ignore`]).
91//! - [`model`] — wrapper around [`model2vec-rs`][model2vec] for loading
92//!   the default and multilingual static embedding models.
93//!
94//! [Veles]: https://github.com/julymetodiev/Veles
95//! [model2vec]: https://github.com/MinishLab/model2vec-rs
96
97pub mod chunker;
98pub mod filter;
99pub mod index;
100pub mod model;
101pub mod persist;
102pub mod ranking;
103pub mod scope;
104pub mod symbols;
105pub mod tokenizer;
106pub mod types;
107pub mod veles_index;
108pub mod walker;
109
110// Re-export the main types.
111pub use types::{Chunk, IndexStats, SearchMode, SearchResult};
112pub use veles_index::VelesIndex;