use crate::block::{Block, BlockType};
use crate::document::Document;
use std::collections::HashMap;
#[derive(Debug, Clone, Default)]
pub struct BlockStats {
pub char_count: usize,
pub word_count: usize,
pub line_count: usize,
pub sentence_count: usize,
pub metadata_count: usize,
pub reading_time_secs: f64,
}
impl BlockStats {
pub fn from_block(block: &Block) -> Self {
let content = &block.content;
let char_count = content.chars().count();
let word_count = content.split_whitespace().count();
let line_count = content.lines().count().max(1);
let sentence_count = content
.chars()
.filter(|c| *c == '.' || *c == '!' || *c == '?')
.count()
.max(1);
let metadata_count = block.metadata.len();
let reading_time_secs = (word_count as f64 / 225.0) * 60.0;
Self {
char_count,
word_count,
line_count,
sentence_count,
metadata_count,
reading_time_secs,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct DocumentStats {
pub block_count: usize,
pub total_chars: usize,
pub total_words: usize,
pub total_lines: usize,
pub total_sentences: usize,
pub blocks_by_type: HashMap<String, usize>,
pub avg_words_per_block: f64,
pub avg_chars_per_block: f64,
pub reading_time_mins: f64,
pub complexity_score: f64,
pub title_length: usize,
pub metadata_count: usize,
}
impl DocumentStats {
pub fn from_document(doc: &Document) -> Self {
let mut stats = Self {
block_count: doc.blocks.len(),
title_length: doc.title.len(),
metadata_count: doc.metadata.len(),
..Self::default()
};
for block in &doc.blocks {
let block_stats = BlockStats::from_block(block);
stats.total_chars += block_stats.char_count;
stats.total_words += block_stats.word_count;
stats.total_lines += block_stats.line_count;
stats.total_sentences += block_stats.sentence_count;
let type_name = block.type_name().to_string();
*stats.blocks_by_type.entry(type_name).or_insert(0) += 1;
}
if stats.block_count > 0 {
stats.avg_words_per_block = stats.total_words as f64 / stats.block_count as f64;
stats.avg_chars_per_block = stats.total_chars as f64 / stats.block_count as f64;
}
stats.reading_time_mins = stats.total_words as f64 / 225.0;
stats.complexity_score = Self::calculate_complexity(doc, &stats);
stats
}
fn calculate_complexity(_doc: &Document, stats: &DocumentStats) -> f64 {
let mut score = 0.0;
let type_count = stats.blocks_by_type.len();
score += (type_count as f64 * 5.0).min(25.0);
if stats.total_sentences > 0 {
let avg_words_per_sentence = stats.total_words as f64 / stats.total_sentences as f64;
score += (avg_words_per_sentence * 1.5).min(25.0);
}
let code_blocks = stats.blocks_by_type.get("code").unwrap_or(&0);
score += (*code_blocks as f64 * 5.0).min(25.0);
score += (stats.block_count as f64 * 0.5).min(25.0);
score.min(100.0)
}
pub fn summary(&self) -> String {
format!(
"Document with {} blocks, {} words, ~{:.1} min read time",
self.block_count, self.total_words, self.reading_time_mins
)
}
pub fn most_common_block_type(&self) -> Option<&str> {
self.blocks_by_type
.iter()
.max_by_key(|(_, count)| *count)
.map(|(name, _)| name.as_str())
}
}
pub struct DocumentAnalyzer;
impl DocumentAnalyzer {
pub fn analyze(doc: &Document) -> DocumentStats {
DocumentStats::from_document(doc)
}
pub fn analyze_block(block: &Block) -> BlockStats {
BlockStats::from_block(block)
}
pub fn find_issues(doc: &Document) -> Vec<DocumentIssue> {
let mut issues = Vec::new();
if doc.title.trim().is_empty() {
issues.push(DocumentIssue::new(
IssueSeverity::Warning,
"Document has no title",
));
}
if doc.blocks.is_empty() {
issues.push(DocumentIssue::new(
IssueSeverity::Error,
"Document has no blocks",
));
}
for (i, block) in doc.blocks.iter().enumerate() {
if block.content.trim().is_empty() {
issues.push(DocumentIssue::new(
IssueSeverity::Warning,
&format!("Block {} has empty content", i + 1),
));
}
if block.content.len() > 10000 {
issues.push(DocumentIssue::new(
IssueSeverity::Info,
&format!(
"Block {} has very long content ({} chars)",
i + 1,
block.content.len()
),
));
}
if let BlockType::Header { level } = &block.block_type {
if *level > 6 {
issues.push(DocumentIssue::new(
IssueSeverity::Error,
&format!("Block {} has invalid header level: {}", i + 1, level),
));
}
}
}
let has_h1 = doc
.blocks
.iter()
.any(|b| matches!(b.block_type, BlockType::Header { level: 1 }));
if !has_h1 && !doc.blocks.is_empty() {
issues.push(DocumentIssue::new(
IssueSeverity::Info,
"Document has no H1 header",
));
}
issues
}
pub fn readability_score(doc: &Document) -> f64 {
let stats = DocumentStats::from_document(doc);
if stats.total_words == 0 || stats.total_sentences == 0 {
return 0.0;
}
let total_syllables: usize = doc
.blocks
.iter()
.map(|b| Self::count_syllables(&b.content))
.sum();
let words = stats.total_words as f64;
let sentences = stats.total_sentences as f64;
let syllables = total_syllables as f64;
let score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words);
score.clamp(0.0, 100.0)
}
fn count_syllables(text: &str) -> usize {
let vowels = ['a', 'e', 'i', 'o', 'u', 'y'];
let mut count = 0;
let mut prev_was_vowel = false;
for c in text.to_lowercase().chars() {
let is_vowel = vowels.contains(&c);
if is_vowel && !prev_was_vowel {
count += 1;
}
prev_was_vowel = is_vowel;
}
count.max(1)
}
pub fn reading_level(score: f64) -> &'static str {
match score as i32 {
90..=100 => "Very Easy (5th grade)",
80..=89 => "Easy (6th grade)",
70..=79 => "Fairly Easy (7th grade)",
60..=69 => "Standard (8th-9th grade)",
50..=59 => "Fairly Difficult (10th-12th grade)",
30..=49 => "Difficult (College)",
0..=29 => "Very Difficult (Graduate)",
_ => "Unknown",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IssueSeverity {
Info,
Warning,
Error,
}
#[derive(Debug, Clone)]
pub struct DocumentIssue {
pub severity: IssueSeverity,
pub message: String,
}
impl DocumentIssue {
pub fn new(severity: IssueSeverity, message: &str) -> Self {
Self {
severity,
message: message.to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_doc() -> Document {
let mut doc = Document::with_title("Test Document".to_string());
doc.add_block(Block::new(
BlockType::Header { level: 1 },
"Introduction".to_string(),
));
doc.add_block(Block::new(
BlockType::Text,
"This is a sample paragraph with multiple words.".to_string(),
));
doc.add_block(Block::new(
BlockType::Code {
language: Some("rust".to_string()),
},
"fn main() {}".to_string(),
));
doc
}
#[test]
fn test_block_stats() {
let block = Block::new(BlockType::Text, "Hello world. How are you?".to_string());
let stats = BlockStats::from_block(&block);
assert_eq!(stats.word_count, 5);
assert!(stats.char_count > 0);
assert!(stats.sentence_count >= 1);
}
#[test]
fn test_document_stats() {
let doc = create_test_doc();
let stats = DocumentStats::from_document(&doc);
assert_eq!(stats.block_count, 3);
assert!(stats.total_words > 0);
assert_eq!(stats.blocks_by_type.get("header"), Some(&1));
assert_eq!(stats.blocks_by_type.get("text"), Some(&1));
assert_eq!(stats.blocks_by_type.get("code"), Some(&1));
}
#[test]
fn test_document_stats_summary() {
let doc = create_test_doc();
let stats = DocumentStats::from_document(&doc);
let summary = stats.summary();
assert!(summary.contains("3 blocks"));
assert!(summary.contains("words"));
}
#[test]
fn test_most_common_block_type() {
let mut doc = Document::new();
doc.add_block(Block::new(BlockType::Text, "text 1".to_string()));
doc.add_block(Block::new(BlockType::Text, "text 2".to_string()));
doc.add_block(Block::new(
BlockType::Header { level: 1 },
"header".to_string(),
));
let stats = DocumentStats::from_document(&doc);
assert_eq!(stats.most_common_block_type(), Some("text"));
}
#[test]
fn test_find_issues_empty_title() {
let doc = Document::new();
let issues = DocumentAnalyzer::find_issues(&doc);
assert!(issues.iter().any(|i| i.message.contains("no title")));
}
#[test]
fn test_find_issues_empty_blocks() {
let doc = Document::with_title("Test".to_string());
let issues = DocumentAnalyzer::find_issues(&doc);
assert!(issues.iter().any(|i| i.message.contains("no blocks")));
}
#[test]
fn test_find_issues_empty_content() {
let mut doc = Document::with_title("Test".to_string());
doc.add_block(Block::new(BlockType::Text, " ".to_string()));
let issues = DocumentAnalyzer::find_issues(&doc);
assert!(issues.iter().any(|i| i.message.contains("empty content")));
}
#[test]
fn test_readability_score() {
let mut doc = Document::with_title("Test".to_string());
doc.add_block(Block::new(
BlockType::Text,
"The cat sat on the mat. The dog ran in the park.".to_string(),
));
let score = DocumentAnalyzer::readability_score(&doc);
assert!(score > 0.0);
assert!(score <= 100.0);
}
#[test]
fn test_reading_level() {
assert_eq!(
DocumentAnalyzer::reading_level(95.0),
"Very Easy (5th grade)"
);
assert_eq!(
DocumentAnalyzer::reading_level(65.0),
"Standard (8th-9th grade)"
);
assert_eq!(
DocumentAnalyzer::reading_level(25.0),
"Very Difficult (Graduate)"
);
}
#[test]
fn test_complexity_score() {
let doc = create_test_doc();
let stats = DocumentStats::from_document(&doc);
assert!(stats.complexity_score >= 0.0);
assert!(stats.complexity_score <= 100.0);
}
#[test]
fn test_syllable_counting() {
assert!(DocumentAnalyzer::count_syllables("hello") >= 2);
assert!(DocumentAnalyzer::count_syllables("beautiful") >= 3);
}
#[test]
fn test_empty_document_stats() {
let doc = Document::new();
let stats = DocumentStats::from_document(&doc);
assert_eq!(stats.block_count, 0);
assert_eq!(stats.total_words, 0);
assert_eq!(stats.reading_time_mins, 0.0);
}
}