vectorless 0.1.23

Hierarchical, reasoning-native document intelligence engine
Documentation
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Tree node definition using indextree (Arena-based).
//!
//! This module provides a node type for hierarchical document representation.
//! Each branch represents a section and each leaf contains the actual text.

use indextree::NodeId as IndexTreeNodeId;
use serde::{Deserialize, Serialize};
use std::fmt;

use super::reference::NodeReference;

/// Unique identifier for a node in the document tree.
///
/// This is a newtype wrapper around indextree's NodeId to provide
/// better type safety and domain-specific semantics.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct NodeId(pub IndexTreeNodeId);

// Implement traits for interoperability
impl fmt::Display for NodeId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "NodeId({:?})", self.0)
    }
}

impl Serialize for NodeId {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        self.0.serialize(serializer)
    }
}

impl<'de> Deserialize<'de> for NodeId {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        let id = IndexTreeNodeId::deserialize(deserializer)?;
        Ok(NodeId(id))
    }
}

/// A node in the Vectorless document tree.
///
/// Each branch represents a section and each leaf contains the actual text.
/// When a question is asked, an LLM navigates this tree level by level
/// to find the right answer.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TreeNode {
    /// Title of this section.
    pub title: String,

    /// Hierarchical structure index (e.g., "1", "1.1", "1.2.3").
    ///
    /// This provides a human-readable path to the node and is useful for:
    /// - LLM navigation (easier to understand "go to section 2.1.3")
    /// - Table of contents display
    /// - Cross-referencing
    #[serde(default)]
    pub structure: String,

    /// Raw text content (populated at leaves).
    #[serde(default)]
    pub content: String,

    /// Generated by LLM summary.
    #[serde(default)]
    pub summary: String,

    /// Depth in tree (0 = root, 1 = section, 2 = subsection, etc.).
    #[serde(default)]
    pub depth: usize,

    /// Starting line number (1-based).
    #[serde(default)]
    pub start_index: usize,

    /// Ending line number (1-based).
    #[serde(default)]
    pub end_index: usize,

    /// Starting page number (1-based, if applicable).
    pub start_page: Option<usize>,

    /// Ending page number (1-based, if applicable).
    pub end_page: Option<usize>,

    /// Unique node identifier (e.g., "0001", "0002").
    pub node_id: Option<String>,

    /// Physical index marker for line tracking.
    pub physical_index: Option<String>,

    /// Token count estimate.
    pub token_count: Option<usize>,

    /// References found in this node's content.
    ///
    /// These are in-document references like "see Appendix G" or
    /// "refer to Table 5.3" that can be followed during retrieval.
    #[serde(default)]
    pub references: Vec<NodeReference>,
}

impl Default for TreeNode {
    fn default() -> Self {
        Self {
            title: String::new(),
            structure: String::new(),
            content: String::new(),
            summary: String::new(),
            depth: 0,
            start_index: 1,
            end_index: 1,
            start_page: None,
            end_page: None,
            node_id: None,
            physical_index: None,
            token_count: None,
            references: Vec::new(),
        }
    }
}