Skip to main content

vectorless/core/
traits.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core traits for the vectorless library.
5//!
6//! This module defines the main extension points of the library:
7//! - [`DocumentParser`] - Parse documents into raw nodes
8//! - [`Summarizer`] - Generate summaries for tree nodes
9
10use async_trait::async_trait;
11use std::path::Path;
12
13use super::{DocumentTree, NodeId, Result};
14
15// ============================================================
16// Document Parser Trait
17// ============================================================
18
19/// A parser for extracting content from documents.
20///
21/// Implementations parse different document formats and produce
22/// a sequence of raw nodes that can be organized into a tree.
23///
24/// # Example
25///
26/// ```rust
27/// use vectorless::core::DocumentParser;
28/// use vectorless::document::MarkdownParser;
29/// use async_trait::async_trait;
30///
31/// # #[tokio::main]
32/// # async fn main() -> vectorless::core::Result<()> {
33/// let parser = MarkdownParser::new();
34/// let content = "# Title\n\nContent here.";
35/// let result = parser.parse(content).await?;
36/// println!("Found {} nodes", result.node_count());
37/// # Ok(())
38/// # }
39/// ```
40#[async_trait]
41pub trait DocumentParser: Send + Sync {
42    /// Get the document format this parser handles.
43    fn format(&self) -> crate::document::DocumentFormat;
44
45    /// Parse content from a string.
46    ///
47    /// # Arguments
48    ///
49    /// * `content` - The document content as a string
50    ///
51    /// # Returns
52    ///
53    /// A [`ParseResult`] containing extracted nodes and metadata.
54    async fn parse(&self, content: &str) -> Result<crate::document::ParseResult>;
55
56    /// Parse content from a file.
57    ///
58    /// Default implementation reads the file and calls [`parse`](Self::parse).
59    ///
60    /// # Arguments
61    ///
62    /// * `path` - Path to the file
63    async fn parse_file(&self, path: &Path) -> Result<crate::document::ParseResult> {
64        let content = tokio::fs::read_to_string(path)
65            .await
66            .map_err(|e| crate::core::Error::Parse(format!("Failed to read file: {}", e)))?;
67
68        self.parse(&content).await
69    }
70}
71
72// ============================================================
73// Summarizer Trait
74// ============================================================
75
76/// A summarizer generates concise summaries for tree nodes.
77///
78/// Implementations can use different strategies:
79/// - LLM-based summarization
80/// - Extractive summarization
81/// - Hybrid approaches
82///
83/// # Example
84///
85/// ```rust
86/// use vectorless::core::{Summarizer, DocumentTree, NodeId, Result};
87/// use async_trait::async_trait;
88///
89/// struct MySummarizer;
90///
91/// #[async_trait]
92/// impl Summarizer for MySummarizer {
93///     async fn summarize(&self, tree: &DocumentTree, node: NodeId) -> Result<String> {
94///         let content = tree.get(node)
95///             .map(|n| n.content.as_str())
96///             .unwrap_or("");
97///         Ok(format!("Summary: {}", &content[..50.min(content.len())]))
98///     }
99/// }
100/// ```
101#[async_trait]
102pub trait Summarizer: Send + Sync {
103    /// Generate a summary for the given node.
104    ///
105    /// # Arguments
106    ///
107    /// * `tree` - The document tree containing the node
108    /// * `node` - The node to summarize
109    ///
110    /// # Returns
111    ///
112    /// A summary string, or an error if summarization fails.
113    async fn summarize(&self, tree: &DocumentTree, node: NodeId) -> Result<String>;
114}
115
116// ============================================================
117// Retriever Trait
118// ============================================================
119
120/// A retriever finds relevant content in a document tree.
121///
122/// Implementations can use different strategies:
123/// - LLM-based navigation (tree traversal)
124/// - MCTS (Monte Carlo Tree Search)
125/// - Beam search
126/// - Vector similarity
127///
128/// # Example
129///
130/// ```rust
131/// use vectorless::core::{Retriever, DocumentTree, Result};
132/// use vectorless::retriever::RetrieveOptions;
133/// use async_trait::async_trait;
134///
135/// struct MyRetriever;
136///
137/// #[async_trait]
138/// impl Retriever for MyRetriever {
139///     async fn retrieve(&self, tree: &DocumentTree, query: &str, options: &RetrieveOptions) -> Result<Vec<RetrievalResult>> {
140///         // Return relevant content
141///         Ok(vec![RetrievalResult::new("Relevant content")])
142///     }
143/// }
144/// ```
145#[async_trait]
146pub trait Retriever: Send + Sync {
147    /// Retrieve relevant content for a query.
148    ///
149    /// # Arguments
150    ///
151    /// * `tree` - The document tree to search
152    /// * `query` - The user's question
153    /// * `options` - Retrieval options
154    ///
155    /// # Returns
156    ///
157    /// A list of retrieval results with content, scores, and metadata.
158    async fn retrieve(
159        &self,
160        tree: &DocumentTree,
161        query: &str,
162        options: &crate::retriever::RetrieveOptions,
163    ) -> Result<Vec<crate::retriever::RetrievalResult>>;
164}
165
166// ============================================================
167// Configuration Types
168// ============================================================
169
170/// Configuration for summarization behavior.
171#[derive(Debug, Clone)]
172pub struct SummarizerConfig {
173    /// Maximum tokens for the summary.
174    pub max_tokens: usize,
175
176    /// Whether to include child content in summaries.
177    pub include_children: bool,
178
179    /// Minimum content length to trigger summarization.
180    pub min_content_length: usize,
181}
182
183impl Default for SummarizerConfig {
184    fn default() -> Self {
185        Self {
186            max_tokens: 200,
187            include_children: false,
188            min_content_length: 100,
189        }
190    }
191}