Skip to main content

veles_core/
lib.rs

1//! `veles-core` — fast, hybrid (BM25 + semantic) local code search.
2//!
3//! `veles-core` is the indexing and search engine that powers the [Veles]
4//! CLI, MCP server, and gRPC service. It walks a directory, chunks source
5//! files, builds a BM25 inverted index plus a dense
6//! [`model2vec-rs`][model2vec] embedding index, and serves hybrid queries
7//! using Reciprocal Rank Fusion. Tree-sitter is used to extract
8//! definitions for symbol-level lookups.
9//!
10//! Design goals:
11//!
12//! - **No GPU, no transformer forward pass at query time.** Embeddings
13//!   come from a static [model2vec] model, so query latency stays in
14//!   the tens of milliseconds on CPU.
15//! - **Persistent on-disk index.** Indexes live under `<repo>/.veles/`
16//!   and support incremental updates that reuse embeddings of unchanged
17//!   files.
18//! - **Pure Rust.** No Python interpreter, no protobuf compiler, no
19//!   native ML runtime — `cargo build --release` is enough.
20//!
21//! # Quick start
22//!
23//! ```no_run
24//! use std::path::Path;
25//! use veles_core::{SearchMode, VelesIndex};
26//!
27//! # fn main() -> anyhow::Result<()> {
28//! // Build an index from a directory. The first call downloads the
29//! // default embedding model (~64 MB) into the HuggingFace cache.
30//! let index = VelesIndex::from_path(Path::new("."), None, None, false)?;
31//!
32//! // Hybrid (BM25 + semantic) search — the default for most queries.
33//! let results = index.search(
34//!     "parse config file",
35//!     5,
36//!     SearchMode::Hybrid,
37//!     None,  // alpha — auto-detect from query type
38//!     None,  // language filter
39//!     None,  // path filter
40//! );
41//!
42//! for r in results {
43//!     println!("{} [{:.3}]", r.chunk.location(), r.score);
44//! }
45//! # Ok(())
46//! # }
47//! ```
48//!
49//! # Persistence
50//!
51//! Indexes can be saved to and loaded from `<repo>/.veles/`:
52//!
53//! ```no_run
54//! # use std::path::Path;
55//! # use veles_core::VelesIndex;
56//! # fn main() -> anyhow::Result<()> {
57//! let repo = Path::new(".");
58//! let index = VelesIndex::from_path(repo, None, None, false)?;
59//! index.save(repo)?;
60//!
61//! // Later, reload without re-embedding:
62//! let model = veles_core::model::load_model(None)?;
63//! let mut reloaded = VelesIndex::load(repo, model)?;
64//!
65//! // Refresh files that changed on disk; unchanged files keep their
66//! // embeddings. Bare `touch` (mtime drift, identical bytes) is a
67//! // manifest-only refresh via the BLAKE3 content_hash fallback.
68//! let report = reloaded.update_from_path(repo)?;
69//! eprintln!("{} added, {} modified, {} removed, {} mtime-only",
70//!     report.added_files, report.modified_files,
71//!     report.removed_files, report.mtime_refreshed_files);
72//! # Ok(())
73//! # }
74//! ```
75//!
76//! # Module overview
77//!
78//! - [`veles_index`] — the main [`VelesIndex`] type combining BM25, dense,
79//!   symbols, and persistence.
80//! - [`chunker`] — line-based source chunking with overlap.
81//! - [`tokenizer`] — identifier-aware tokeniser (camelCase, snake_case,
82//!   Cyrillic, CJK).
83//! - [`index`] — sparse ([`index::sparse`]) and dense
84//!   ([`index::dense`]) indexes, [`index::search`] entry points, and
85//!   [`index::topk`] selection.
86//! - [`ranking`] — query-type detection, definition boosts, file-path
87//!   penalties, file-saturation decay.
88//! - [`symbols`] — tree-sitter symbol extraction for Rust, Python,
89//!   JavaScript, TypeScript, and Go.
90//! - [`persist`] — on-disk format under `.veles/`.
91//! - [`walker`] — `.gitignore`-aware file walker (built on
92//!   [`ignore`]).
93//! - [`model`] — wrapper around [`model2vec-rs`][model2vec] for loading
94//!   the default and multilingual static embedding models.
95//!
96//! [Veles]: https://github.com/julymetodiev/Veles
97//! [model2vec]: https://github.com/MinishLab/model2vec-rs
98
99pub mod cache;
100pub mod chunker;
101pub mod filter;
102pub mod index;
103pub mod model;
104pub mod persist;
105pub mod ranking;
106pub mod scope;
107pub mod symbols;
108pub mod tokenizer;
109pub mod types;
110pub mod veles_index;
111pub mod walker;
112
113// Re-export the main types.
114pub use types::{Chunk, IndexStats, SearchMode, SearchResult};
115pub use veles_index::VelesIndex;