vectorless/retrieval/retriever.rs
1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core Retriever trait and related types.
5
6use async_trait::async_trait;
7
8use super::types::{RetrieveOptions, RetrieveResponse};
9use crate::document::DocumentTree;
10
11/// Result type for retriever operations.
12pub type RetrieverResult<T> = Result<T, RetrieverError>;
13
14/// Errors that can occur during retrieval.
15#[derive(Debug, thiserror::Error)]
16pub enum RetrieverError {
17 /// The document tree is empty or invalid.
18 #[error("Invalid document tree: {0}")]
19 InvalidTree(String),
20
21 /// No relevant nodes found for the query.
22 #[error("No relevant nodes found for query")]
23 NoResults,
24
25 /// LLM call failed during retrieval.
26 #[error("LLM error: {0}")]
27 LlmError(String),
28
29 /// Embedding generation failed.
30 #[error("Embedding error: {0}")]
31 EmbeddingError(String),
32
33 /// Cache operation failed.
34 #[error("Cache error: {0}")]
35 CacheError(String),
36
37 /// Configuration error.
38 #[error("Configuration error: {0}")]
39 ConfigError(String),
40
41 /// Internal error during retrieval.
42 #[error("Internal error: {0}")]
43 Internal(String),
44}
45
46/// Trait for document retrieval strategies.
47///
48/// Implementations provide different approaches to navigating
49/// the document tree and finding relevant content.
50#[async_trait]
51pub trait Retriever: Send + Sync {
52 /// Retrieve relevant content for the given query.
53 ///
54 /// # Arguments
55 ///
56 /// * `tree` - The document tree to search
57 /// * `query` - The user's query string
58 /// * `options` - Retrieval options controlling behavior
59 ///
60 /// # Returns
61 ///
62 /// A `RetrieveResponse` containing the retrieved content and metadata.
63 async fn retrieve(
64 &self,
65 tree: &DocumentTree,
66 query: &str,
67 options: &RetrieveOptions,
68 ) -> RetrieverResult<RetrieveResponse>;
69
70 /// Get the name of this retriever for logging/debugging.
71 fn name(&self) -> &str;
72
73 /// Check if this retriever supports the given options.
74 ///
75 /// Some retrievers may not support all features (e.g., sufficiency checking).
76 fn supports_options(&self, _options: &RetrieveOptions) -> bool {
77 true
78 }
79
80 /// Estimate the cost of a retrieval operation.
81 ///
82 /// Returns an estimated number of LLM calls or tokens that will be used.
83 /// Useful for cost-aware strategy selection.
84 fn estimate_cost(&self, tree: &DocumentTree, options: &RetrieveOptions) -> CostEstimate {
85 let node_count = tree.node_count();
86 CostEstimate {
87 llm_calls: node_count / 2, // Rough estimate
88 tokens: node_count * 100,
89 }
90 }
91}
92
93/// Cost estimate for a retrieval operation.
94#[derive(Debug, Clone, Copy, Default)]
95pub struct CostEstimate {
96 /// Estimated number of LLM calls.
97 pub llm_calls: usize,
98 /// Estimated number of tokens.
99 pub tokens: usize,
100}
101
102/// Context passed to strategies during retrieval.
103#[derive(Debug, Clone)]
104pub struct RetrievalContext {
105 /// The original query.
106 pub query: String,
107 /// Normalized/lowercase query for matching.
108 pub query_normalized: String,
109 /// Query tokens for keyword matching.
110 pub query_tokens: Vec<String>,
111 /// Current depth in the tree.
112 pub current_depth: usize,
113 /// Number of results collected so far.
114 pub results_count: usize,
115 /// Total tokens collected so far.
116 pub tokens_collected: usize,
117 /// Maximum tokens allowed.
118 pub max_tokens: usize,
119 /// Whether sufficiency check is enabled.
120 pub sufficiency_enabled: bool,
121}
122
123impl RetrievalContext {
124 /// Create a new retrieval context from a query.
125 pub fn new(query: &str, max_tokens: usize, sufficiency_enabled: bool) -> Self {
126 let query_normalized = query.to_lowercase();
127 let query_tokens: Vec<String> = query_normalized
128 .split_whitespace()
129 .map(|s| s.to_string())
130 .collect();
131
132 Self {
133 query: query.to_string(),
134 query_normalized,
135 query_tokens,
136 current_depth: 0,
137 results_count: 0,
138 tokens_collected: 0,
139 max_tokens,
140 sufficiency_enabled,
141 }
142 }
143
144 /// Check if we've reached the token limit.
145 pub fn is_token_limit_reached(&self) -> bool {
146 self.tokens_collected >= self.max_tokens
147 }
148
149 /// Calculate token utilization percentage.
150 pub fn token_utilization(&self) -> f32 {
151 if self.max_tokens == 0 {
152 0.0
153 } else {
154 (self.tokens_collected as f32 / self.max_tokens as f32).min(1.0)
155 }
156 }
157}