use crate::Result;
#[cfg(feature = "semantic")]
use serde_json::{json, Value};
#[cfg(feature = "semantic")]
use super::chunking::DocumentChunk;
#[derive(Debug, Clone)]
pub struct DocumentMetadata {
pub title: String,
pub page_count: usize,
pub created_at: Option<String>,
pub author: Option<String>,
}
impl Default for DocumentMetadata {
fn default() -> Self {
Self {
title: "Untitled Document".to_string(),
page_count: 0,
created_at: None,
author: None,
}
}
}
#[derive(Debug, Clone)]
pub struct MarkdownOptions {
pub include_metadata: bool,
pub include_page_numbers: bool,
}
impl Default for MarkdownOptions {
fn default() -> Self {
Self {
include_metadata: true,
include_page_numbers: true,
}
}
}
#[derive(Debug, Clone)]
pub struct MarkdownExporter {
options: MarkdownOptions,
}
impl MarkdownExporter {
pub fn new(options: MarkdownOptions) -> Self {
Self { options }
}
pub fn default() -> Self {
Self::new(MarkdownOptions::default())
}
pub fn export(&self, text: &str) -> Result<String> {
if self.options.include_metadata {
Self::export_text(text)
} else {
Ok(text.to_string())
}
}
pub fn export_text(text: &str) -> Result<String> {
let mut output = String::new();
output.push_str("# Document\n\n");
output.push_str(text);
Ok(output)
}
pub fn export_with_metadata(text: &str, metadata: &DocumentMetadata) -> Result<String> {
let mut output = String::new();
output.push_str("---\n");
let escaped_title = if metadata.title.contains(':') || metadata.title.contains('#') {
format!("\"{}\"", metadata.title.replace('"', "\\\""))
} else {
metadata.title.clone()
};
output.push_str(&format!("title: {}\n", escaped_title));
output.push_str(&format!("pages: {}\n", metadata.page_count));
if let Some(ref created) = metadata.created_at {
output.push_str(&format!("created: {}\n", created));
}
if let Some(ref author) = metadata.author {
let escaped_author = if author.contains(':') {
format!("\"{}\"", author.replace('"', "\\\""))
} else {
author.clone()
};
output.push_str(&format!("author: {}\n", escaped_author));
}
output.push_str("---\n\n");
output.push_str(&format!("# {}\n\n", metadata.title));
output.push_str(text);
Ok(output)
}
pub fn export_with_pages(page_texts: &[(usize, String)]) -> Result<String> {
let mut output = String::new();
output.push_str("# Document\n\n");
for (i, (page_num, text)) in page_texts.iter().enumerate() {
if i > 0 {
output.push_str("\n\n---\n\n");
}
output.push_str(&format!("**Page {}**\n\n", page_num));
output.push_str(text);
}
Ok(output)
}
pub fn export_with_metadata_and_pages(
page_texts: &[(usize, String)],
metadata: &DocumentMetadata,
) -> Result<String> {
let mut output = String::new();
output.push_str("---\n");
let escaped_title = if metadata.title.contains(':') || metadata.title.contains('#') {
format!("\"{}\"", metadata.title.replace('"', "\\\""))
} else {
metadata.title.clone()
};
output.push_str(&format!("title: {}\n", escaped_title));
output.push_str(&format!("pages: {}\n", metadata.page_count));
if let Some(ref created) = metadata.created_at {
output.push_str(&format!("created: {}\n", created));
}
if let Some(ref author) = metadata.author {
let escaped_author = if author.contains(':') {
format!("\"{}\"", author.replace('"', "\\\""))
} else {
author.clone()
};
output.push_str(&format!("author: {}\n", escaped_author));
}
output.push_str("---\n\n");
output.push_str(&format!("# {}\n\n", metadata.title));
for (i, (page_num, text)) in page_texts.iter().enumerate() {
if i > 0 {
output.push_str("\n\n---\n\n");
}
output.push_str(&format!("**Page {}**\n\n", page_num));
output.push_str(text);
}
Ok(output)
}
}
#[cfg(feature = "semantic")]
#[derive(Debug, Clone)]
pub struct JsonOptions {
pub pretty_print: bool,
pub include_chunks: bool,
}
#[cfg(feature = "semantic")]
impl Default for JsonOptions {
fn default() -> Self {
Self {
pretty_print: true,
include_chunks: false,
}
}
}
#[cfg(feature = "semantic")]
#[derive(Debug, Clone)]
pub struct JsonExporter {
options: JsonOptions,
}
#[cfg(feature = "semantic")]
impl JsonExporter {
pub fn new(options: JsonOptions) -> Self {
Self { options }
}
pub fn default() -> Self {
Self::new(JsonOptions::default())
}
pub fn export(&self, text: &str) -> Result<String> {
let doc = json!({
"type": "document",
"content": text
});
if self.options.pretty_print {
serde_json::to_string_pretty(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
} else {
serde_json::to_string(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
}
}
pub fn export_simple(text: &str) -> Result<String> {
let doc = json!({
"type": "document",
"content": text
});
serde_json::to_string_pretty(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
}
pub fn export_with_metadata(text: &str, metadata: &DocumentMetadata) -> Result<String> {
let mut meta_obj = json!({
"title": metadata.title,
"page_count": metadata.page_count
});
if let Some(ref created) = metadata.created_at {
meta_obj["created_at"] = json!(created);
}
if let Some(ref author) = metadata.author {
meta_obj["author"] = json!(author);
}
let doc = json!({
"type": "document",
"metadata": meta_obj,
"content": text
});
serde_json::to_string_pretty(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
}
pub fn export_pages(page_texts: &[(usize, String)]) -> Result<String> {
let pages: Vec<Value> = page_texts
.iter()
.map(|(page_num, text)| {
json!({
"page_number": page_num,
"content": text
})
})
.collect();
let doc = json!({
"type": "document",
"page_count": page_texts.len(),
"pages": pages
});
serde_json::to_string_pretty(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
}
pub fn export_with_chunks(chunks: &[DocumentChunk]) -> Result<String> {
let chunk_objects: Vec<Value> = chunks
.iter()
.map(|chunk| {
json!({
"id": chunk.id,
"content": chunk.content,
"tokens": chunk.tokens,
"page_numbers": chunk.page_numbers,
"chunk_index": chunk.chunk_index,
"metadata": {
"position": {
"start_char": chunk.metadata.position.start_char,
"end_char": chunk.metadata.position.end_char,
"first_page": chunk.metadata.position.first_page,
"last_page": chunk.metadata.position.last_page
},
"confidence": chunk.metadata.confidence,
"sentence_boundary_respected": chunk.metadata.sentence_boundary_respected
}
})
})
.collect();
let doc = json!({
"type": "chunked_document",
"chunk_count": chunks.len(),
"chunks": chunk_objects
});
serde_json::to_string_pretty(&doc)
.map_err(|e| crate::error::PdfError::SerializationError(e.to_string()))
}
}
#[derive(Debug, Clone)]
pub struct ContextualFormat;
impl ContextualFormat {
pub fn export_simple(text: &str) -> Result<String> {
let mut output = String::new();
output.push_str("Document content:\n\n");
output.push_str(text);
Ok(output)
}
pub fn export_with_metadata(text: &str, metadata: &DocumentMetadata) -> Result<String> {
let mut output = String::new();
output.push_str(&format!("This is a document titled \"{}\"", metadata.title));
if metadata.page_count > 0 {
output.push_str(&format!(
" with {} page{}",
metadata.page_count,
if metadata.page_count == 1 { "" } else { "s" }
));
}
if let Some(ref author) = metadata.author {
output.push_str(&format!(", written by {}", author));
}
if let Some(ref created) = metadata.created_at {
output.push_str(&format!(", created on {}", created));
}
output.push_str(".\n\n");
output.push_str("Content:\n\n");
output.push_str(text);
Ok(output)
}
pub fn export_with_pages(page_texts: &[(usize, String)]) -> Result<String> {
let mut output = String::new();
output.push_str("Document content:\n\n");
for (page_num, text) in page_texts.iter() {
output.push_str(&format!("On page {}:\n", page_num));
output.push_str(text);
output.push_str("\n\n");
}
Ok(output)
}
pub fn export_with_metadata_and_pages(
page_texts: &[(usize, String)],
metadata: &DocumentMetadata,
) -> Result<String> {
let mut output = String::new();
output.push_str(&format!("This is a document titled \"{}\"", metadata.title));
if metadata.page_count > 0 {
output.push_str(&format!(
" with {} page{}",
metadata.page_count,
if metadata.page_count == 1 { "" } else { "s" }
));
}
if let Some(ref author) = metadata.author {
output.push_str(&format!(", written by {}", author));
}
if let Some(ref created) = metadata.created_at {
output.push_str(&format!(", created on {}", created));
}
output.push_str(".\n\n");
output.push_str("Content:\n\n");
for (page_num, text) in page_texts.iter() {
output.push_str(&format!("On page {}:\n", page_num));
output.push_str(text);
output.push_str("\n\n");
}
Ok(output)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_text_to_markdown() {
let text = "hello world";
let result = MarkdownExporter::export_text(text).unwrap();
assert!(result.contains("# Document"), "Should have document header");
assert!(
result.contains("hello world"),
"Should contain original text"
);
let lines: Vec<&str> = result.lines().collect();
assert_eq!(lines[0], "# Document");
assert_eq!(lines[1], "");
assert_eq!(lines[2], "hello world");
}
#[test]
fn test_empty_text() {
let result = MarkdownExporter::export_text("").unwrap();
assert!(
result.contains("# Document"),
"Should still have header for empty text"
);
assert_eq!(result, "# Document\n\n");
}
#[test]
fn test_multiline_text() {
let text = "First line\nSecond line\nThird line";
let result = MarkdownExporter::export_text(text).unwrap();
assert!(result.contains("First line"));
assert!(result.contains("Second line"));
assert!(result.contains("Third line"));
}
#[test]
fn test_text_with_special_characters() {
let text = "Text with # hash and * asterisk";
let result = MarkdownExporter::export_text(text).unwrap();
assert!(result.contains("# hash"));
assert!(result.contains("* asterisk"));
}
#[test]
fn test_markdown_exporter_creation() {
let exporter = MarkdownExporter::new(MarkdownOptions {
include_metadata: true,
include_page_numbers: false,
});
assert!(exporter.options.include_metadata);
assert!(!exporter.options.include_page_numbers);
}
#[test]
fn test_markdown_exporter_default() {
let exporter = MarkdownExporter::default();
assert!(exporter.options.include_metadata);
assert!(exporter.options.include_page_numbers);
}
#[test]
fn test_markdown_with_metadata() {
let metadata = DocumentMetadata {
title: "Test Document".to_string(),
page_count: 10,
created_at: Some("2025-10-13".to_string()),
author: Some("John Doe".to_string()),
};
let result = MarkdownExporter::export_with_metadata("Sample content", &metadata).unwrap();
assert!(result.starts_with("---\n"), "Should start with YAML marker");
assert!(result.contains("title: Test Document"));
assert!(result.contains("pages: 10"));
assert!(result.contains("created: 2025-10-13"));
assert!(result.contains("author: John Doe"));
assert!(result.contains("# Test Document"));
assert!(result.contains("Sample content"));
}
#[test]
fn test_metadata_with_special_characters() {
let metadata = DocumentMetadata {
title: "Test: Document #1".to_string(),
page_count: 5,
created_at: None,
author: None,
};
let result = MarkdownExporter::export_with_metadata("Content", &metadata).unwrap();
assert!(result.contains("title: \"Test: Document #1\""));
}
#[test]
fn test_metadata_minimal() {
let metadata = DocumentMetadata {
title: "Simple".to_string(),
page_count: 1,
created_at: None,
author: None,
};
let result = MarkdownExporter::export_with_metadata("Text", &metadata).unwrap();
assert!(result.contains("title: Simple"));
assert!(result.contains("pages: 1"));
assert!(!result.contains("created:"));
assert!(!result.contains("author:"));
}
#[test]
fn test_document_metadata_default() {
let metadata = DocumentMetadata::default();
assert_eq!(metadata.title, "Untitled Document");
assert_eq!(metadata.page_count, 0);
assert!(metadata.created_at.is_none());
assert!(metadata.author.is_none());
}
#[test]
fn test_multipage_markdown() {
let pages = vec![
(1, "Content of page 1".to_string()),
(2, "Content of page 2".to_string()),
(3, "Content of page 3".to_string()),
];
let result = MarkdownExporter::export_with_pages(&pages).unwrap();
assert!(result.starts_with("# Document\n\n"));
assert!(result.contains("**Page 1**"));
assert!(result.contains("**Page 2**"));
assert!(result.contains("**Page 3**"));
assert!(result.contains("Content of page 1"));
assert!(result.contains("Content of page 2"));
assert!(result.contains("Content of page 3"));
let separator_count = result.matches("\n---\n").count();
assert_eq!(separator_count, 2, "Should have 2 separators for 3 pages");
}
#[test]
fn test_page_numbers_correct() {
let pages = vec![(1, "First".to_string()), (2, "Second".to_string())];
let result = MarkdownExporter::export_with_pages(&pages).unwrap();
let page1_pos = result.find("**Page 1**").unwrap();
let page2_pos = result.find("**Page 2**").unwrap();
assert!(page1_pos < page2_pos, "Page 1 should appear before Page 2");
}
#[test]
fn test_single_page_no_separator() {
let pages = vec![(1, "Single page content".to_string())];
let result = MarkdownExporter::export_with_pages(&pages).unwrap();
assert!(
!result.contains("---"),
"Single page should not have separator"
);
assert!(result.contains("**Page 1**"));
assert!(result.contains("Single page content"));
}
#[test]
fn test_empty_pages_list() {
let pages: Vec<(usize, String)> = vec![];
let result = MarkdownExporter::export_with_pages(&pages).unwrap();
assert_eq!(result, "# Document\n\n");
}
#[test]
fn test_metadata_and_pages_combined() {
let metadata = DocumentMetadata {
title: "Test Document".to_string(),
page_count: 2,
created_at: Some("2025-10-13".to_string()),
author: Some("John Doe".to_string()),
};
let pages = vec![
(1, "Page one text".to_string()),
(2, "Page two text".to_string()),
];
let result = MarkdownExporter::export_with_metadata_and_pages(&pages, &metadata).unwrap();
assert!(result.starts_with("---\n"));
assert!(result.contains("title: Test Document"));
assert!(result.contains("pages: 2"));
assert!(result.contains("created: 2025-10-13"));
assert!(result.contains("author: John Doe"));
assert!(result.contains("# Test Document"));
assert!(result.contains("**Page 1**"));
assert!(result.contains("**Page 2**"));
assert!(result.contains("Page one text"));
assert!(result.contains("Page two text"));
}
#[test]
fn test_page_separator_format() {
let pages = vec![(1, "A".to_string()), (2, "B".to_string())];
let result = MarkdownExporter::export_with_pages(&pages).unwrap();
assert!(result.contains("\n\n---\n\n"));
}
#[cfg(feature = "semantic")]
#[test]
fn test_basic_json_export() {
let text = "Hello, world!";
let result = JsonExporter::export_simple(text).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["type"], "document");
assert_eq!(parsed["content"], "Hello, world!");
}
#[cfg(feature = "semantic")]
#[test]
fn test_json_parsing() {
let text = "Sample content";
let json = JsonExporter::export_simple(text).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert!(parsed.is_object());
assert_eq!(parsed["type"], "document");
assert_eq!(parsed["content"], "Sample content");
}
#[cfg(feature = "semantic")]
#[test]
fn test_json_with_metadata() {
let metadata = DocumentMetadata {
title: "Test Doc".to_string(),
page_count: 10,
created_at: Some("2025-10-13".to_string()),
author: Some("Jane Doe".to_string()),
};
let json = JsonExporter::export_with_metadata("Content", &metadata).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["metadata"]["title"], "Test Doc");
assert_eq!(parsed["metadata"]["page_count"], 10);
assert_eq!(parsed["metadata"]["created_at"], "2025-10-13");
assert_eq!(parsed["metadata"]["author"], "Jane Doe");
assert_eq!(parsed["content"], "Content");
}
#[cfg(feature = "semantic")]
#[test]
fn test_json_pages_export() {
let pages = vec![
(1, "Page 1 text".to_string()),
(2, "Page 2 text".to_string()),
(3, "Page 3 text".to_string()),
];
let json = JsonExporter::export_pages(&pages).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["type"], "document");
assert_eq!(parsed["page_count"], 3);
let pages_array = parsed["pages"].as_array().unwrap();
assert_eq!(pages_array.len(), 3);
assert_eq!(pages_array[0]["page_number"], 1);
assert_eq!(pages_array[0]["content"], "Page 1 text");
assert_eq!(pages_array[1]["page_number"], 2);
assert_eq!(pages_array[1]["content"], "Page 2 text");
assert_eq!(pages_array[2]["page_number"], 3);
assert_eq!(pages_array[2]["content"], "Page 3 text");
}
#[cfg(feature = "semantic")]
#[test]
fn test_json_exporter_options() {
let exporter = JsonExporter::new(JsonOptions {
pretty_print: false,
include_chunks: false,
});
let result = exporter.export("test").unwrap();
assert!(!result.contains('\n'));
}
#[cfg(feature = "semantic")]
#[test]
fn test_json_pretty_print() {
let exporter = JsonExporter::new(JsonOptions {
pretty_print: true,
include_chunks: false,
});
let result = exporter.export("test").unwrap();
assert!(result.contains('\n'));
assert!(result.contains(" ")); }
#[cfg(feature = "semantic")]
#[test]
fn test_json_empty_pages() {
let pages: Vec<(usize, String)> = vec![];
let json = JsonExporter::export_pages(&pages).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["page_count"], 0);
assert_eq!(parsed["pages"].as_array().unwrap().len(), 0);
}
#[cfg(feature = "semantic")]
#[test]
fn test_export_with_chunks_basic() {
use crate::ai::chunking::{ChunkMetadata, ChunkPosition};
let chunks = vec![
DocumentChunk {
id: "chunk_0".to_string(),
content: "First chunk content".to_string(),
tokens: 10,
page_numbers: vec![1],
chunk_index: 0,
metadata: ChunkMetadata {
position: ChunkPosition {
start_char: 0,
end_char: 100,
first_page: 1,
last_page: 1,
},
confidence: 1.0,
sentence_boundary_respected: true,
},
},
DocumentChunk {
id: "chunk_1".to_string(),
content: "Second chunk content".to_string(),
tokens: 12,
page_numbers: vec![1, 2],
chunk_index: 1,
metadata: ChunkMetadata {
position: ChunkPosition {
start_char: 90,
end_char: 200,
first_page: 1,
last_page: 2,
},
confidence: 0.95,
sentence_boundary_respected: false,
},
},
];
let json = JsonExporter::export_with_chunks(&chunks).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["type"], "chunked_document");
assert_eq!(parsed["chunk_count"], 2);
let chunks_array = parsed["chunks"].as_array().unwrap();
assert_eq!(chunks_array.len(), 2);
assert_eq!(chunks_array[0]["id"], "chunk_0");
assert_eq!(chunks_array[0]["tokens"], 10);
assert_eq!(chunks_array[0]["content"], "First chunk content");
assert_eq!(chunks_array[0]["page_numbers"][0], 1);
assert_eq!(chunks_array[0]["chunk_index"], 0);
assert_eq!(chunks_array[0]["metadata"]["confidence"], 1.0);
assert_eq!(
chunks_array[0]["metadata"]["sentence_boundary_respected"],
true
);
assert_eq!(chunks_array[0]["metadata"]["position"]["start_char"], 0);
assert_eq!(chunks_array[0]["metadata"]["position"]["end_char"], 100);
assert_eq!(chunks_array[0]["metadata"]["position"]["first_page"], 1);
assert_eq!(chunks_array[0]["metadata"]["position"]["last_page"], 1);
assert_eq!(chunks_array[1]["id"], "chunk_1");
assert_eq!(chunks_array[1]["chunk_index"], 1);
assert_eq!(chunks_array[1]["tokens"], 12);
assert_eq!(chunks_array[1]["page_numbers"].as_array().unwrap().len(), 2);
let confidence = chunks_array[1]["metadata"]["confidence"].as_f64().unwrap();
assert!(
(confidence - 0.95).abs() < 0.01,
"Confidence should be approximately 0.95, got {}",
confidence
);
assert_eq!(
chunks_array[1]["metadata"]["sentence_boundary_respected"],
false
);
}
#[cfg(feature = "semantic")]
#[test]
fn test_export_with_chunks_empty() {
let chunks: Vec<DocumentChunk> = vec![];
let json = JsonExporter::export_with_chunks(&chunks).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["type"], "chunked_document");
assert_eq!(parsed["chunk_count"], 0);
assert_eq!(parsed["chunks"].as_array().unwrap().len(), 0);
}
#[cfg(feature = "semantic")]
#[test]
fn test_export_with_chunks_position_metadata() {
use crate::ai::chunking::{ChunkMetadata, ChunkPosition};
let chunk = DocumentChunk {
id: "test_chunk".to_string(),
content: "Test content for position tracking".to_string(),
tokens: 5,
page_numbers: vec![5, 6, 7],
chunk_index: 10,
metadata: ChunkMetadata {
position: ChunkPosition {
start_char: 1000,
end_char: 2000,
first_page: 5,
last_page: 7,
},
confidence: 0.85,
sentence_boundary_respected: false,
},
};
let json = JsonExporter::export_with_chunks(&[chunk]).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["chunk_count"], 1);
let chunk_obj = &parsed["chunks"][0];
assert_eq!(chunk_obj["id"], "test_chunk");
assert_eq!(chunk_obj["tokens"], 5);
assert_eq!(chunk_obj["chunk_index"], 10);
assert_eq!(chunk_obj["content"], "Test content for position tracking");
let pages = chunk_obj["page_numbers"].as_array().unwrap();
assert_eq!(pages.len(), 3);
assert_eq!(pages[0], 5);
assert_eq!(pages[1], 6);
assert_eq!(pages[2], 7);
let pos = &chunk_obj["metadata"]["position"];
assert_eq!(pos["start_char"], 1000);
assert_eq!(pos["end_char"], 2000);
assert_eq!(pos["first_page"], 5);
assert_eq!(pos["last_page"], 7);
let confidence = chunk_obj["metadata"]["confidence"].as_f64().unwrap();
assert!(
(confidence - 0.85).abs() < 0.01,
"Confidence should be approximately 0.85, got {}",
confidence
);
assert_eq!(chunk_obj["metadata"]["sentence_boundary_respected"], false);
}
#[cfg(feature = "semantic")]
#[test]
fn test_export_with_chunks_multiple_pages() {
use crate::ai::chunking::{ChunkMetadata, ChunkPosition};
let chunk = DocumentChunk {
id: "multipage".to_string(),
content: "Content spanning pages".to_string(),
tokens: 20,
page_numbers: vec![2, 3, 4],
chunk_index: 0,
metadata: ChunkMetadata {
position: ChunkPosition {
start_char: 500,
end_char: 1500,
first_page: 2,
last_page: 4,
},
confidence: 1.0,
sentence_boundary_respected: true,
},
};
let json = JsonExporter::export_with_chunks(&[chunk]).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
let chunk_obj = &parsed["chunks"][0];
let pages = chunk_obj["page_numbers"].as_array().unwrap();
assert_eq!(pages.len(), 3);
assert_eq!(chunk_obj["metadata"]["position"]["first_page"], 2);
assert_eq!(chunk_obj["metadata"]["position"]["last_page"], 4);
}
#[test]
fn test_contextual_simple() {
let text = "This is sample content.";
let result = ContextualFormat::export_simple(text).unwrap();
assert!(result.contains("Document content:"));
assert!(result.contains("This is sample content."));
assert_eq!(result, "Document content:\n\nThis is sample content.");
}
#[test]
fn test_contextual_with_metadata_full() {
let metadata = DocumentMetadata {
title: "Annual Report".to_string(),
page_count: 25,
created_at: Some("2025-01-15".to_string()),
author: Some("Jane Smith".to_string()),
};
let result =
ContextualFormat::export_with_metadata("Report content here.", &metadata).unwrap();
assert!(result.contains("This is a document titled \"Annual Report\""));
assert!(result.contains("with 25 pages"));
assert!(result.contains("written by Jane Smith"));
assert!(result.contains("created on 2025-01-15"));
assert!(result.contains("Content:"));
assert!(result.contains("Report content here."));
assert!(!result.contains("page,"));
assert!(result.contains("pages,"));
}
#[test]
fn test_contextual_with_metadata_minimal() {
let metadata = DocumentMetadata {
title: "Simple Doc".to_string(),
page_count: 1,
created_at: None,
author: None,
};
let result = ContextualFormat::export_with_metadata("Content", &metadata).unwrap();
assert!(result.contains("titled \"Simple Doc\""));
assert!(result.contains("with 1 page"));
assert!(!result.contains("pages")); assert!(!result.contains("written by"));
assert!(!result.contains("created on"));
}
#[test]
fn test_contextual_with_metadata_no_page_count() {
let metadata = DocumentMetadata {
title: "Test".to_string(),
page_count: 0,
created_at: None,
author: None,
};
let result = ContextualFormat::export_with_metadata("Text", &metadata).unwrap();
assert!(!result.contains("with"));
assert!(!result.contains("page"));
assert!(result.contains("This is a document titled \"Test\"."));
}
#[test]
fn test_contextual_with_pages() {
let pages = vec![
(1, "First page text".to_string()),
(2, "Second page text".to_string()),
(3, "Third page text".to_string()),
];
let result = ContextualFormat::export_with_pages(&pages).unwrap();
assert!(result.starts_with("Document content:\n\n"));
assert!(result.contains("On page 1:\nFirst page text"));
assert!(result.contains("On page 2:\nSecond page text"));
assert!(result.contains("On page 3:\nThird page text"));
}
#[test]
fn test_contextual_with_pages_empty() {
let pages: Vec<(usize, String)> = vec![];
let result = ContextualFormat::export_with_pages(&pages).unwrap();
assert_eq!(result, "Document content:\n\n");
}
#[test]
fn test_contextual_with_pages_single() {
let pages = vec![(1, "Only page".to_string())];
let result = ContextualFormat::export_with_pages(&pages).unwrap();
assert!(result.contains("On page 1:\nOnly page"));
}
#[test]
fn test_contextual_with_metadata_and_pages() {
let metadata = DocumentMetadata {
title: "Technical Guide".to_string(),
page_count: 2,
created_at: Some("2025-10-13".to_string()),
author: Some("John Doe".to_string()),
};
let pages = vec![
(1, "Introduction text".to_string()),
(2, "Main content".to_string()),
];
let result = ContextualFormat::export_with_metadata_and_pages(&pages, &metadata).unwrap();
assert!(result.contains("titled \"Technical Guide\""));
assert!(result.contains("with 2 pages"));
assert!(result.contains("written by John Doe"));
assert!(result.contains("created on 2025-10-13"));
assert!(result.contains("Content:"));
assert!(result.contains("On page 1:\nIntroduction text"));
assert!(result.contains("On page 2:\nMain content"));
}
#[test]
fn test_contextual_natural_language_flow() {
let metadata = DocumentMetadata {
title: "Report".to_string(),
page_count: 5,
created_at: Some("2025-01-01".to_string()),
author: Some("Alice".to_string()),
};
let result = ContextualFormat::export_with_metadata("Text", &metadata).unwrap();
assert!(result.starts_with("This is a document titled \"Report\" with 5 pages, written by Alice, created on 2025-01-01."));
}
#[test]
fn test_contextual_empty_text() {
let result = ContextualFormat::export_simple("").unwrap();
assert_eq!(result, "Document content:\n\n");
}
}