vectorless/lib.rs
1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! # Vectorless
5//!
6//! **A hierarchical, reasoning-native document intelligence engine.**
7//!
8//! Replace your vector database with LLM-powered tree navigation.
9//! No embeddings. No vector search. Just reasoning.
10//!
11//! ## Overview
12//!
13//! Traditional RAG systems chunk documents into flat vectors, losing structure.
14//! Vectorless preserves your document's hierarchy and uses an LLM to navigate it —
15//! like a human skimming a table of contents, then drilling into relevant sections.
16//!
17//! ## Architecture
18//!
19//! ```text
20//! ┌─────────────────────────────────────────────────────────────────┐
21//! │ client │
22//! │ (Engine, EngineBuilder) │
23//! └────────────────────────────┬────────────────────────────────────┘
24//! │
25//! ┌──────────────────┼──────────────────┐
26//! ▼ ▼ ▼
27//! ┌──────────┐ ┌───────────┐ ┌──────────┐
28//! │ index │ │ retrieval │ │ storage │
29//! │ (write) │ │ (read) │ │ (persist)│
30//! └────┬─────┘ └─────┬─────┘ └────┬─────┘
31//! │ │ │
32//! └───────────┬───────┘ │
33//! ▼ │
34//! ┌───────────┐ │
35//! │ domain │ │
36//! │(Tree/Node)│ │
37//! └─────┬─────┘ │
38//! │ │
39//! ┌──────────────┼──────────────┐ │
40//! ▼ ▼ ▼ │
41//! ┌────────┐ ┌──────────┐ ┌────────┐ │
42//! │ parser │ │ llm │ │ config │◄─────┘
43//! └────────┘ └──────────┘ └────────┘
44//! ```
45//!
46//! ## Features
47//!
48//! - 🌳 **Tree-Based Indexing** — Documents as hierarchical trees, not flat chunks
49//! - 🧠 **LLM Navigation** — Reasoning-based traversal to find relevant content
50//! - 🚀 **Zero Infrastructure** — No vector database, no embedding models
51//! - 📄 **Multi-Format** — Markdown, PDF, DOCX support
52//! - 💾 **Persistent Workspace** — LRU-cached storage with lazy loading
53//! - 🔄 **Retry & Fallback** — Resilient LLM calls with automatic recovery
54//!
55//! ## Quick Start
56//!
57//! ```rust,no_run
58//! use vectorless::{EngineBuilder, Engine};
59//!
60//! #[tokio::main]
61//! async fn main() -> vectorless::domain::Result<()> {
62//! // Create client
63//! let mut client = EngineBuilder::new()
64//! .with_workspace("./workspace")
65//! .build()?;
66//!
67//! // Index a document
68//! let doc_id = client.index("./document.md").await?;
69//!
70//! // Query with natural language
71//! let result = client.query(&doc_id, "What is this about?").await?;
72//! println!("{}", result.content);
73//!
74//! Ok(())
75//! }
76//! ```
77//!
78//! ## Modules
79//!
80//! | Module | Description |
81//! |--------|-------------|
82//! | [`client`] | High-level API (`Engine`, `EngineBuilder`) |
83//! | [`domain`] | Core domain types (`DocumentTree`, `TreeNode`, `NodeId`) |
84//! | [`index`] | Document indexing pipeline |
85//! | [`retrieval`] | Retrieval strategies and search algorithms |
86//! | [`config`] | Configuration management |
87//! | [`llm`] | LLM client with retry & fallback |
88//! | [`parser`] | Document parsers (Markdown, PDF, DOCX) |
89//! | [`storage`] | Workspace persistence |
90//! | [`throttle`] | Rate limiting |
91
92// =============================================================================
93// Modules
94// =============================================================================
95
96pub mod client;
97pub mod config;
98pub mod throttle;
99pub mod domain;
100pub mod index;
101pub mod llm;
102pub mod parser;
103pub mod retrieval;
104pub mod storage;
105
106// =============================================================================
107// Re-exports (Convenience API)
108// =============================================================================
109
110// Client API (most common entry point)
111pub use client::{DocumentInfo, IndexedDocument, Engine, EngineBuilder};
112
113// Domain types
114pub use domain::{
115 Error, Result, NodeId, TreeNode, DocumentTree,
116 DocumentStructure, StructureNode,
117 TocView, TocNode, TocEntry, TocConfig,
118 estimate_tokens, estimate_tokens_fast,
119};
120
121// Configuration
122pub use config::{Config, ConfigLoader, RetrievalConfig, SummaryConfig};
123
124// LLM
125pub use llm::{LlmClient, LlmConfig, LlmConfigs, LlmError, LlmPool, RetryConfig};
126
127// Document parsing
128pub use parser::{DocumentFormat, DocumentParser, DocxParser, MarkdownParser, PdfParser, ParseResult, RawNode};
129
130// Indexing
131pub use index::{
132 PipelineExecutor, PipelineOptions, IndexInput, IndexMode,
133 IndexContext, IndexResult, IndexStage, IndexMetrics,
134 SummaryStrategy, ChangeDetector, ChangeSet, PartialUpdater,
135};
136pub use index::pipeline::{PipelineOrchestrator, CustomStageBuilder};
137
138// Retrieval
139pub use retrieval::{
140 PipelineRetriever, Retriever, RetrieverError, RetrieverResult,
141 RetrieveOptions, RetrieveResponse, RetrievalResult, RetrievalContext,
142 QueryComplexity, StrategyPreference, SufficiencyLevel,
143 ContextBuilder, PruningStrategy, TokenEstimation,
144 NavigationDecision, NavigationStep, SearchPath,
145 format_for_llm, format_for_llm_async, format_tree_for_llm, format_tree_for_llm_async,
146};
147
148// Storage
149pub use storage::{DocumentMeta as StorageDocumentMeta, PersistedDocument, Workspace};
150
151// Throttle
152pub use throttle::{ConcurrencyConfig, ConcurrencyController, RateLimiter};