use serde::{Deserialize, Serialize};
use crate::document::DocumentTree;
use super::budget::SelectedContent;
use super::config::OutputFormatConfig;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum OutputFormat {
#[default]
Markdown,
Json,
Tree,
Flat,
}
impl From<OutputFormatConfig> for OutputFormat {
fn from(config: OutputFormatConfig) -> Self {
match config {
OutputFormatConfig::Markdown => Self::Markdown,
OutputFormatConfig::Json => Self::Json,
OutputFormatConfig::Tree => Self::Tree,
OutputFormatConfig::Flat => Self::Flat,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentTreeNode {
pub title: String,
pub content: Option<String>,
pub score: f32,
pub children: Vec<ContentTreeNode>,
}
impl ContentTreeNode {
#[must_use]
pub fn new(title: String) -> Self {
Self {
title,
content: None,
score: 0.0,
children: Vec::new(),
}
}
#[must_use]
pub fn with_content(mut self, content: String, score: f32) -> Self {
self.content = Some(content);
self.score = score;
self
}
pub fn add_child(&mut self, child: ContentTreeNode) {
self.children.push(child);
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentTree {
pub root: ContentTreeNode,
pub total_nodes: usize,
}
impl ContentTree {
#[must_use]
pub fn new(root: ContentTreeNode) -> Self {
Self {
total_nodes: 1,
root,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ContentMetadata {
pub total_tokens: usize,
pub node_count: usize,
pub avg_score: f32,
pub max_depth: usize,
}
#[derive(Debug, Clone)]
pub struct StructuredContent {
pub content: String,
pub structure: Option<ContentTree>,
pub metadata: ContentMetadata,
}
impl StructuredContent {
#[must_use]
pub fn is_empty(&self) -> bool {
self.content.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.content.len()
}
}
#[derive(Debug)]
pub struct StructureBuilder {
format: OutputFormat,
include_metadata: bool,
include_scores: bool,
}
impl StructureBuilder {
#[must_use]
pub fn new(format: OutputFormat) -> Self {
Self {
format,
include_metadata: false,
include_scores: false,
}
}
#[must_use]
pub fn from_config(format: OutputFormatConfig, include_scores: bool) -> Self {
Self {
format: OutputFormat::from(format),
include_metadata: false,
include_scores,
}
}
#[must_use]
pub fn with_metadata(mut self) -> Self {
self.include_metadata = true;
self
}
#[must_use]
pub fn with_scores(mut self) -> Self {
self.include_scores = true;
self
}
#[must_use]
pub fn build(&self, selected: Vec<SelectedContent>, tree: &DocumentTree) -> StructuredContent {
if selected.is_empty() {
return StructuredContent {
content: String::new(),
structure: None,
metadata: ContentMetadata::default(),
};
}
let total_tokens: usize = selected.iter().map(|s| s.tokens).sum();
let avg_score = selected.iter().map(|s| s.score).sum::<f32>() / selected.len() as f32;
let max_depth = selected.iter().map(|s| s.depth).max().unwrap_or(0);
let metadata = ContentMetadata {
total_tokens,
node_count: selected.len(),
avg_score,
max_depth,
};
let (content, structure) = match &self.format {
OutputFormat::Markdown => self.build_markdown(selected, tree),
OutputFormat::Json => self.build_json(selected, tree),
OutputFormat::Tree => self.build_tree_format(selected, tree),
OutputFormat::Flat => self.build_flat(selected),
};
StructuredContent {
content,
structure,
metadata,
}
}
fn build_markdown(
&self,
selected: Vec<SelectedContent>,
_tree: &DocumentTree,
) -> (String, Option<ContentTree>) {
let mut sections = Vec::new();
let mut current_depth = 0;
let mut sorted = selected;
sorted.sort_by(|a, b| a.depth.cmp(&b.depth));
for content in sorted {
let heading_level = (content.depth + 1).min(6);
let heading = "#".repeat(heading_level);
let mut section = format!("{} {}", heading, content.title);
if self.include_scores {
section.push_str(&format!(" *(score: {:.2})*", content.score));
}
section.push_str("\n\n");
section.push_str(&content.content);
if content.is_truncated() {
section.push_str("\n\n*[content truncated]*");
}
sections.push(section);
current_depth = current_depth.max(content.depth);
}
(sections.join("\n\n---\n\n"), None)
}
fn build_json(
&self,
selected: Vec<SelectedContent>,
_tree: &DocumentTree,
) -> (String, Option<ContentTree>) {
#[derive(Serialize)]
struct JsonOutput<'a> {
sections: Vec<JsonSection<'a>>,
}
#[derive(Serialize)]
struct JsonSection<'a> {
title: &'a str,
content: &'a str,
score: f32,
depth: usize,
truncated: bool,
}
let sections: Vec<_> = selected
.iter()
.map(|s| JsonSection {
title: &s.title,
content: &s.content,
score: s.score,
depth: s.depth,
truncated: s.is_truncated(),
})
.collect();
let output = JsonOutput { sections };
let content = serde_json::to_string_pretty(&output).unwrap_or_default();
(content, None)
}
fn build_tree_format(
&self,
selected: Vec<SelectedContent>,
tree: &DocumentTree,
) -> (String, Option<ContentTree>) {
let mut root = ContentTreeNode::new("Content".to_string());
let mut node_count = 0;
use std::collections::HashMap;
let mut by_parent: HashMap<Option<crate::document::NodeId>, Vec<&SelectedContent>> =
HashMap::new();
for content in &selected {
let parent = tree.get(content.node_id).and_then(|_| {
selected
.iter()
.find(|s| s.depth < content.depth)
.map(|s| Some(s.node_id))
.unwrap_or(None)
});
by_parent.entry(parent).or_default().push(content);
}
fn build_node(
content: &SelectedContent,
all_by_parent: &HashMap<Option<crate::document::NodeId>, Vec<&SelectedContent>>,
) -> ContentTreeNode {
let mut node = ContentTreeNode::new(content.title.clone())
.with_content(content.content.clone(), content.score);
if let Some(children) = all_by_parent.get(&Some(content.node_id)) {
for child in children {
node.add_child(build_node(child, all_by_parent));
}
}
node
}
if let Some(top_level) = by_parent.get(&None) {
for content in top_level {
let node = build_node(content, &by_parent);
node_count += count_nodes(&node);
root.add_child(node);
}
}
let content = render_tree(&root, 0);
let tree_structure = ContentTree {
root,
total_nodes: node_count,
};
(content, Some(tree_structure))
}
fn build_flat(&self, selected: Vec<SelectedContent>) -> (String, Option<ContentTree>) {
let parts: Vec<_> = selected
.iter()
.map(|c| {
let mut part = format!("[{}] {}", c.title, c.content);
if self.include_scores {
part = format!("[{}] (score: {:.2}) {}", c.title, c.score, c.content);
}
part
})
.collect();
(parts.join("\n\n"), None)
}
}
impl Default for StructureBuilder {
fn default() -> Self {
Self::new(OutputFormat::default())
}
}
fn count_nodes(node: &ContentTreeNode) -> usize {
1 + node.children.iter().map(count_nodes).sum::<usize>()
}
fn render_tree(node: &ContentTreeNode, depth: usize) -> String {
let indent = " ".repeat(depth);
let mut result = format!("{}├─ {} (score: {:.2})\n", indent, node.title, node.score);
if let Some(ref content) = node.content {
let preview = if content.len() > 100 {
format!("{}...", &content[..100])
} else {
content.clone()
};
result.push_str(&format!("{}│ {}\n", indent, preview.replace('\n', " ")));
}
for child in &node.children {
result.push_str(&render_tree(child, depth + 1));
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::document::NodeId;
use indextree::Arena;
fn make_test_node_id() -> NodeId {
let mut arena = Arena::new();
let node = crate::document::TreeNode {
title: "Test".to_string(),
structure: String::new(),
content: String::new(),
summary: String::new(),
depth: 0,
start_index: 0,
end_index: 0,
start_page: None,
end_page: None,
node_id: None,
physical_index: None,
token_count: None,
references: Vec::new(),
};
NodeId(arena.new_node(node))
}
fn make_selected(title: &str, content: &str, score: f32, depth: usize) -> SelectedContent {
SelectedContent {
node_id: make_test_node_id(),
title: title.to_string(),
content: content.to_string(),
tokens: 50,
score,
depth,
truncation: None,
}
}
#[test]
fn test_markdown_builder() {
let builder = StructureBuilder::new(OutputFormat::Markdown);
let selected = vec![
make_selected("Section 1", "Content 1", 0.9, 0),
make_selected("Section 2", "Content 2", 0.8, 1),
];
let tree = DocumentTree::new("Test", "");
let result = builder.build(selected, &tree);
assert!(!result.is_empty());
assert!(result.content.contains("Section 1"));
assert!(result.content.contains("Section 2"));
assert!(result.content.contains("# Section 1"));
assert!(result.content.contains("## Section 2"));
}
#[test]
fn test_flat_builder() {
let builder = StructureBuilder::new(OutputFormat::Flat);
let selected = vec![make_selected("Section 1", "Content 1", 0.9, 0)];
let tree = DocumentTree::new("Test", "");
let result = builder.build(selected, &tree);
assert!(result.content.contains("[Section 1]"));
assert!(result.content.contains("Content 1"));
}
#[test]
fn test_builder_with_scores() {
let builder = StructureBuilder::new(OutputFormat::Markdown).with_scores();
let selected = vec![make_selected("Section 1", "Content 1", 0.95, 0)];
let tree = DocumentTree::new("Test", "");
let result = builder.build(selected, &tree);
assert!(result.content.contains("score: 0.95"));
}
#[test]
fn test_empty_selected() {
let builder = StructureBuilder::new(OutputFormat::Markdown);
let tree = DocumentTree::new("Test", "");
let result = builder.build(Vec::new(), &tree);
assert!(result.is_empty());
assert_eq!(result.metadata.node_count, 0);
}
#[test]
fn test_content_tree_node() {
let mut root =
ContentTreeNode::new("Root".to_string()).with_content("Root content".to_string(), 0.9);
let child = ContentTreeNode::new("Child".to_string())
.with_content("Child content".to_string(), 0.8);
root.add_child(child);
assert_eq!(root.children.len(), 1);
assert_eq!(root.score, 0.9);
}
}