aurora_semantic/lib.rs
1//! # Aurora Semantic
2//!
3//! A local, embedded semantic search engine for source code, designed to be
4//! bundled directly inside desktop IDEs.
5//!
6//! ## Features
7//!
8//! - **Workspace Indexing**: Index entire codebases with progress reporting
9//! - **Smart Chunking**: Extract meaningful code segments (functions, classes, etc.)
10//! - **Ignore Rules**: Respect .gitignore and custom patterns
11//! - **Persistent Indexes**: Save and reload indexes efficiently
12//! - **Lexical Search**: Fast keyword-based search using Tantivy
13//! - **Semantic Search**: ONNX-based embedding similarity search
14//! - **Hybrid Search**: Combined lexical and semantic search
15//!
16//! ## Quick Start
17//!
18//! ```rust,ignore
19//! use aurora_semantic::{Engine, EngineConfig, WorkspaceConfig, SearchQuery, ModelConfig};
20//! use std::path::PathBuf;
21//!
22//! #[tokio::main]
23//! async fn main() -> aurora_semantic::Result<()> {
24//! // Load your ONNX model
25//! let model = ModelConfig::from_directory("./models/jina-code").load()?;
26//!
27//! // Create engine
28//! let config = EngineConfig::new(PathBuf::from(".aurora"));
29//! let engine = Engine::with_embedder(config, model)?;
30//!
31//! // Index a workspace
32//! let ws_config = WorkspaceConfig::new(PathBuf::from("./my-project"));
33//! let workspace_id = engine.index_workspace(ws_config, None).await?;
34//!
35//! // Search for code
36//! let results = engine.search_text(&workspace_id, "authentication")?;
37//!
38//! for result in results {
39//! println!("{}: {} (score: {:.2})",
40//! result.document.relative_path.display(),
41//! result.chunk.symbol_name.as_deref().unwrap_or("unknown"),
42//! result.score
43//! );
44//! }
45//!
46//! Ok(())
47//! }
48//! ```
49//!
50//! ## Using Your Own ONNX Model
51//!
52//! Aurora uses ONNX Runtime for embedding generation. To use semantic search:
53//!
54//! 1. Download an ONNX model (e.g., `jina-embeddings-v2-base-code`)
55//! 2. Place `model.onnx` and `tokenizer.json` in a directory
56//! 3. Point Aurora to that directory
57//!
58//! ```rust,ignore
59//! use aurora_semantic::{ModelConfig, OnnxEmbedder};
60//!
61//! // Load from directory
62//! let embedder = OnnxEmbedder::from_directory("./models/jina-code")?;
63//!
64//! // Or with custom settings
65//! let embedder = ModelConfig::from_directory("./models/jina-code")
66//! .with_max_length(8192) // Jina supports 8k context
67//! .load()?;
68//! ```
69
70#![warn(missing_docs)]
71
72// Core modules
73mod config;
74mod error;
75mod types;
76
77// Feature modules
78mod chunker;
79mod embeddings;
80mod engine;
81mod ignore;
82mod search;
83mod storage;
84
85// Re-export public API
86
87// Configuration
88pub use config::{
89 ChunkingConfig, EmbeddingConfig, EngineConfig, IgnoreConfig, PerformanceConfig, SearchConfig,
90 SearchMode, WorkspaceConfig,
91};
92
93// Types
94pub use types::{
95 Chunk, ChunkId, ChunkType, Document, DocumentId, Highlight, IndexPhase, IndexProgress,
96 Language, LanguageStats, MatchType, SearchResult, WorkspaceId, WorkspaceStats,
97};
98
99// Error handling
100pub use error::{Error, Result};
101
102// Engine (main API)
103pub use engine::Engine;
104
105// Search
106pub use search::{SearchFilter, SearchQuery};
107
108// Chunking
109pub use chunker::{Chunker, DefaultChunker};
110
111// Embeddings
112pub use embeddings::{
113 Embedder, ExecutionProviderInfo, HashEmbedder, ModelConfig, OnnxEmbedder,
114 // Jina Code Embeddings 1.5B support
115 EmbeddingMode, EmbeddingTask, JinaCodeConfig, JinaCodeEmbedder, MatryoshkaDimension,
116};
117pub use embeddings::pooling::PoolingStrategy;
118
119// Storage
120pub use storage::{DiskStorage, Storage, WorkspaceMetadata};
121
122// Ignore rules
123pub use ignore::{FileFilter, FileWalker};
124
125/// Library version.
126pub const VERSION: &str = env!("CARGO_PKG_VERSION");