Skip to main content

apohara_indexer/
lib.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3//! Apohara code indexer (soft-fork): sqlite-vec storage + blake3
4//! feature-hashing embeddings + tree-sitter parsing.
5//!
6//! This crate is a LIB-ONLY soft-fork of `apohara-indexer` from
7//! SuarezPM/Apohara-Catalyst. The storage, parser, and embeddings modules are
8//! preserved verbatim; the binary entry point (`main.rs`) is intentionally
9//! dropped.
10//!
11//! The new engine modules land in subsequent steps. Step 1 adds `walker`
12//! (gitignore-aware traversal) and `chunker` (symbol/module/window chunking);
13//! `schema`/`search`/`incremental` follow later.
14
15pub mod chunker;
16pub mod embedder;
17/// Real EmbeddingGemma forward-pass backend; compiled ONLY behind `gguf-embed`.
18#[cfg(feature = "gguf-embed")]
19pub mod embedder_gemma;
20pub mod embeddings;
21pub mod incremental;
22pub mod parser;
23pub mod registry;
24pub mod schema;
25pub mod search;
26pub mod storage;
27pub mod tokens;
28pub mod walker;
29
30pub use storage::{
31    ensure_vec_extension_registered, insert_chunk, insert_chunk_full, insert_chunk_full_with,
32    knn_query, knn_query_with, open_db, open_db_with, write_file_structural, IndexedChunk, KnnHit,
33    SymbolData, EMBED_DIM,
34};
35
36pub use embedder::{
37    active_embedder, resolve_embedder_choice, Embedder, EmbedderChoice, FeatureHashEmbedder,
38    EMBED_MODEL_ENV, FEATURE_HASH_ID,
39};
40
41#[cfg(feature = "gguf-embed")]
42pub use embedder_gemma::{EmbeddingGemmaEmbedder, EMBEDDINGGEMMA_ID};
43
44pub use registry::{
45    load as load_registry, register, registry_path, save as save_registry, Registry,
46};
47
48pub use schema::{
49    migrate, read_embedder_meta, verify_embedder_meta, write_embedder_meta, META_EMBEDDER_DIM,
50    META_EMBEDDER_ID, META_SCHEMA_VERSION, MIGRATION_PLACEHOLDER_REPO_ID, SCHEMA_VERSION,
51};
52
53pub use search::{
54    apply_structural_boost, bm25_query, classify_query_weights, dedup_content, dedup_overlapping,
55    hydrate, load_embeddings, mmr_rerank, resolve_weights, rrf_fuse, rrf_fuse_weighted,
56    vector_query, vector_query_with, ExportRow, HydratedHit, ImportRow, MMR_LAMBDA, RRF_K,
57    STRUCTURAL_BOOST,
58};
59
60pub use tokens::code_tokens;
61
62pub use embeddings::feature_hash_embed;
63
64pub use parser::{
65    detect_language, parse_file, parse_imports_exports, parse_source, parse_source_imports_exports,
66    parse_source_spans, ExportStatement, FunctionSignature, ImportStatement, Language, SymbolKind,
67};
68
69pub use walker::{walk_repo, WalkedFile};
70
71pub use chunker::{chunk_file, chunk_id, ChunkKind, ChunkSpec};
72
73pub use incremental::{index_repo, index_repo_with, reindex, reindex_with, ReindexReport};
74
75/// Bundled SQLite version string (e.g. `"3.46.0"`).
76///
77/// Exposed so the `apohara-codesearch` binary can report the version compiled
78/// into rusqlite's `bundled` feature without taking a direct rusqlite dep.
79pub fn sqlite_version() -> &'static str {
80    rusqlite::version()
81}