vectorless 0.1.30

Reasoning-native document intelligence engine for AI
Documentation
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Internal intermediate type produced by the indexing pipeline.
//!
//! [`IndexedDocument`] is an internal-only type that carries data from
//! [`IndexerClient`](super::indexer::IndexerClient) to [`Engine`](super::Engine).
//! It is **not** part of the public API.

use std::path::PathBuf;

use crate::document::DocumentTree;
use crate::index::parse::DocumentFormat;
use crate::metrics::IndexMetrics;
use crate::storage::PageContent;

/// An indexed document with its tree structure and metadata.
///
/// Internal intermediate produced by the indexing pipeline and consumed
/// by [`Engine`](super::Engine) to create a [`PersistedDocument`](crate::storage::PersistedDocument).
#[derive(Debug, Clone)]
pub(crate) struct IndexedDocument {
    /// Unique document identifier.
    pub id: String,

    /// Document format.
    pub format: DocumentFormat,

    /// Document name/title.
    pub name: String,

    /// Document description (generated by LLM).
    pub description: Option<String>,

    /// Source file path.
    pub source_path: Option<PathBuf>,

    /// Page count (for PDFs).
    pub page_count: Option<usize>,

    /// The document tree structure.
    pub tree: Option<DocumentTree>,

    /// Per-page content (for PDFs).
    pub pages: Vec<PageContent>,

    /// Indexing pipeline metrics.
    pub metrics: Option<IndexMetrics>,

    /// Pre-computed reasoning index for retrieval acceleration.
    pub reasoning_index: Option<crate::document::ReasoningIndex>,

    /// Pre-computed navigation index for agent-based retrieval.
    pub navigation_index: Option<crate::document::NavigationIndex>,
}

impl IndexedDocument {
    /// Create a new indexed document.
    pub fn new(id: impl Into<String>, format: DocumentFormat) -> Self {
        Self {
            id: id.into(),
            format,
            name: String::new(),
            description: None,
            source_path: None,
            page_count: None,
            tree: None,
            pages: Vec::new(),
            metrics: None,
            reasoning_index: None,
            navigation_index: None,
        }
    }

    /// Set the document name.
    pub fn with_name(mut self, name: impl Into<String>) -> Self {
        self.name = name.into();
        self
    }

    /// Set the document description.
    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
        self.description = Some(desc.into());
        self
    }

    /// Set the source path.
    pub fn with_source_path(mut self, path: impl Into<PathBuf>) -> Self {
        self.source_path = Some(path.into());
        self
    }

    /// Set the page count.
    pub fn with_page_count(mut self, count: usize) -> Self {
        self.page_count = Some(count);
        self
    }

    /// Set the document tree.
    pub fn with_tree(mut self, tree: DocumentTree) -> Self {
        self.tree = Some(tree);
        self
    }

    /// Set the indexing metrics.
    pub fn with_metrics(mut self, metrics: IndexMetrics) -> Self {
        self.metrics = Some(metrics);
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_indexed_document() {
        let doc = IndexedDocument::new("doc-1", DocumentFormat::Markdown)
            .with_name("Test Document")
            .with_description("A test document");

        assert_eq!(doc.id, "doc-1");
        assert_eq!(doc.name, "Test Document");
        assert!(doc.tree.is_none());
    }
}