use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use turbovault_core::prelude::*;
use turbovault_parser::to_plain_text;
use turbovault_vault::VaultManager;
#[derive(Clone)]
pub struct QualityTools {
pub manager: Arc<VaultManager>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityScore {
pub path: String,
pub overall_score: u8,
pub readability: ReadabilityScore,
pub structure: StructureScore,
pub completeness: CompletenessScore,
pub staleness: StalenessScore,
pub recommendations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadabilityScore {
pub score: u8,
pub flesch_kincaid_grade: f64,
pub avg_sentence_length: f64,
pub avg_word_length: f64,
pub vocabulary_diversity: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StructureScore {
pub score: u8,
pub has_title: bool,
pub heading_count: usize,
pub has_frontmatter: bool,
pub tag_count: usize,
pub has_links: bool,
pub link_count: usize,
pub heading_hierarchy_valid: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompletenessScore {
pub score: u8,
pub word_count: usize,
pub link_density: f64,
pub has_tags: bool,
pub has_frontmatter: bool,
pub frontmatter_keys: usize,
pub has_outgoing_links: bool,
pub has_incoming_links: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StalenessScore {
pub score: u8,
pub days_since_modified: u64,
pub days_since_created: u64,
pub linked_notes_newer: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoreDistribution {
pub poor: usize, pub below_avg: usize, pub average: usize, pub good: usize, pub excellent: usize, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DimensionAverages {
pub readability: f64,
pub structure: f64,
pub completeness: f64,
pub staleness: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VaultQualityReport {
pub total_notes: usize,
pub average_score: f64,
pub score_distribution: ScoreDistribution,
pub lowest_quality: Vec<QualityScore>,
pub highest_quality: Vec<QualityScore>,
pub dimension_averages: DimensionAverages,
pub recommendations: Vec<String>,
}
const READABILITY_WEIGHT: f64 = 0.20;
const STRUCTURE_WEIGHT: f64 = 0.30;
const COMPLETENESS_WEIGHT: f64 = 0.30;
const STALENESS_WEIGHT: f64 = 0.20;
impl QualityTools {
pub fn new(manager: Arc<VaultManager>) -> Self {
Self { manager }
}
pub async fn evaluate_note(&self, path: &str) -> Result<QualityScore> {
let file_path = PathBuf::from(path);
let vault_file = self.manager.parse_file(&file_path).await?;
let plain_content = to_plain_text(&vault_file.content);
let readability = compute_readability(&plain_content);
let structure = compute_structure(&vault_file);
let completeness = compute_completeness(&vault_file, &self.manager, &file_path).await;
let staleness = compute_staleness(&vault_file, &self.manager, &file_path).await;
let overall = weighted_score(
readability.score,
structure.score,
completeness.score,
staleness.score,
);
let recommendations =
generate_recommendations(&readability, &structure, &completeness, &staleness);
Ok(QualityScore {
path: path.to_string(),
overall_score: overall,
readability,
structure,
completeness,
staleness,
recommendations,
})
}
pub async fn vault_quality_report(&self, bottom_n: usize) -> Result<VaultQualityReport> {
let files = self.manager.scan_vault().await?;
let vault_path = self.manager.vault_path();
let mut scores: Vec<QualityScore> = Vec::with_capacity(files.len());
for file_path in &files {
let rel_path = file_path
.strip_prefix(vault_path)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
match self.evaluate_note(&rel_path).await {
Ok(score) => scores.push(score),
Err(e) => {
log::warn!("Failed to evaluate {}: {}", rel_path, e);
}
}
}
let total_notes = scores.len();
if total_notes == 0 {
return Ok(VaultQualityReport {
total_notes: 0,
average_score: 0.0,
score_distribution: ScoreDistribution {
poor: 0,
below_avg: 0,
average: 0,
good: 0,
excellent: 0,
},
lowest_quality: vec![],
highest_quality: vec![],
dimension_averages: DimensionAverages {
readability: 0.0,
structure: 0.0,
completeness: 0.0,
staleness: 0.0,
},
recommendations: vec![],
});
}
let average_score =
scores.iter().map(|s| s.overall_score as f64).sum::<f64>() / total_notes as f64;
let mut dist = ScoreDistribution {
poor: 0,
below_avg: 0,
average: 0,
good: 0,
excellent: 0,
};
for s in &scores {
match s.overall_score {
0..=20 => dist.poor += 1,
21..=40 => dist.below_avg += 1,
41..=60 => dist.average += 1,
61..=80 => dist.good += 1,
_ => dist.excellent += 1,
}
}
let dim_avg = DimensionAverages {
readability: scores
.iter()
.map(|s| s.readability.score as f64)
.sum::<f64>()
/ total_notes as f64,
structure: scores.iter().map(|s| s.structure.score as f64).sum::<f64>()
/ total_notes as f64,
completeness: scores
.iter()
.map(|s| s.completeness.score as f64)
.sum::<f64>()
/ total_notes as f64,
staleness: scores.iter().map(|s| s.staleness.score as f64).sum::<f64>()
/ total_notes as f64,
};
scores.sort_by_key(|s| s.overall_score);
let lowest = scores.iter().take(bottom_n).cloned().collect::<Vec<_>>();
let highest = scores
.iter()
.rev()
.take(bottom_n)
.cloned()
.collect::<Vec<_>>();
let mut recommendations = Vec::new();
if dim_avg.readability < 50.0 {
recommendations.push("Vault readability is below average — consider breaking up long sentences and using simpler vocabulary".to_string());
}
if dim_avg.structure < 50.0 {
recommendations.push(
"Many notes lack proper structure — add headings, frontmatter, and tags"
.to_string(),
);
}
if dim_avg.completeness < 50.0 {
recommendations
.push("Notes are sparse — add more content, links, and metadata".to_string());
}
if dim_avg.staleness < 50.0 {
recommendations
.push("Many notes are stale — review and update outdated content".to_string());
}
if dist.poor > total_notes / 4 {
recommendations.push(format!(
"{} notes scored below 20 — consider reviewing or archiving them",
dist.poor
));
}
Ok(VaultQualityReport {
total_notes,
average_score,
score_distribution: dist,
lowest_quality: lowest,
highest_quality: highest,
dimension_averages: dim_avg,
recommendations,
})
}
pub async fn find_stale_notes(
&self,
threshold_days: u64,
limit: usize,
) -> Result<Vec<QualityScore>> {
let files = self.manager.scan_vault().await?;
let vault_path = self.manager.vault_path();
let mut stale: Vec<QualityScore> = Vec::new();
for file_path in &files {
let rel_path = file_path
.strip_prefix(vault_path)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
if let Ok(score) = self.evaluate_note(&rel_path).await
&& score.staleness.days_since_modified >= threshold_days
{
stale.push(score);
}
}
stale.sort_by(|a, b| {
b.staleness
.days_since_modified
.cmp(&a.staleness.days_since_modified)
});
stale.truncate(limit);
Ok(stale)
}
}
fn current_timestamp() -> f64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs_f64()
}
fn weighted_score(readability: u8, structure: u8, completeness: u8, staleness: u8) -> u8 {
let score = readability as f64 * READABILITY_WEIGHT
+ structure as f64 * STRUCTURE_WEIGHT
+ completeness as f64 * COMPLETENESS_WEIGHT
+ staleness as f64 * STALENESS_WEIGHT;
score.round().clamp(0.0, 100.0) as u8
}
fn count_syllables(word: &str) -> usize {
let word = word.to_lowercase();
if word.len() <= 2 {
return 1;
}
let vowels: HashSet<char> = ['a', 'e', 'i', 'o', 'u', 'y'].into();
let mut count = 0;
let mut prev_vowel = false;
let chars: Vec<char> = word.chars().collect();
for &ch in &chars {
let is_vowel = vowels.contains(&ch);
if is_vowel && !prev_vowel {
count += 1;
}
prev_vowel = is_vowel;
}
if word.ends_with('e') && count > 1 {
count -= 1;
}
count.max(1)
}
fn compute_readability(plain_text: &str) -> ReadabilityScore {
if plain_text.trim().is_empty() {
return ReadabilityScore {
score: 0,
flesch_kincaid_grade: 0.0,
avg_sentence_length: 0.0,
avg_word_length: 0.0,
vocabulary_diversity: 0.0,
};
}
let sentences: Vec<&str> = plain_text
.split(['.', '!', '?'])
.filter(|s| !s.trim().is_empty())
.collect();
let sentence_count = sentences.len().max(1);
let words: Vec<&str> = plain_text.split_whitespace().collect();
let word_count = words.len().max(1);
let total_syllables: usize = words.iter().map(|w| count_syllables(w)).sum();
let avg_sentence_length = word_count as f64 / sentence_count as f64;
let avg_word_length = words.iter().map(|w| w.len()).sum::<usize>() as f64 / word_count as f64;
let unique_words: HashSet<String> = words.iter().map(|w| w.to_lowercase()).collect();
let vocabulary_diversity = unique_words.len() as f64 / word_count as f64;
let fk_grade =
0.39 * avg_sentence_length + 11.8 * (total_syllables as f64 / word_count as f64) - 15.59;
let fk_grade = fk_grade.clamp(-3.0, 30.0);
let score = if fk_grade < 0.0 {
95 } else if fk_grade <= 8.0 {
90 + ((8.0 - fk_grade) * 1.25) as u8
} else if fk_grade <= 12.0 {
70 + ((12.0 - fk_grade) * 5.0) as u8
} else if fk_grade <= 16.0 {
50 + ((16.0 - fk_grade) * 5.0) as u8
} else {
(50.0 - (fk_grade - 16.0) * 3.0).clamp(0.0, 49.0) as u8
};
let diversity_penalty = if vocabulary_diversity < 0.3 { 15 } else { 0 };
ReadabilityScore {
score: score.saturating_sub(diversity_penalty),
flesch_kincaid_grade: (fk_grade * 100.0).round() / 100.0,
avg_sentence_length: (avg_sentence_length * 100.0).round() / 100.0,
avg_word_length: (avg_word_length * 100.0).round() / 100.0,
vocabulary_diversity: (vocabulary_diversity * 1000.0).round() / 1000.0,
}
}
fn compute_structure(vault_file: &VaultFile) -> StructureScore {
let has_title = vault_file.headings.iter().any(|h| h.level == 1);
let heading_count = vault_file.headings.len();
let has_frontmatter = vault_file.frontmatter.is_some();
let tag_count = vault_file.tags.len();
let link_count = vault_file.links.len();
let has_links = link_count > 0;
let heading_hierarchy_valid = check_heading_hierarchy(&vault_file.headings);
let mut score = 0u8;
if has_title {
score += 20;
}
if heading_count >= 2 {
score += 15;
} else if heading_count == 1 {
score += 8;
}
if heading_hierarchy_valid {
score += 10;
}
if has_frontmatter {
score += 20;
}
if tag_count >= 2 {
score += 15;
} else if tag_count == 1 {
score += 8;
}
if link_count >= 3 {
score += 20;
} else if link_count >= 1 {
score += 10;
}
StructureScore {
score,
has_title,
heading_count,
has_frontmatter,
tag_count,
has_links,
link_count,
heading_hierarchy_valid,
}
}
fn check_heading_hierarchy(headings: &[turbovault_core::Heading]) -> bool {
if headings.is_empty() {
return true;
}
let mut prev_level = 0;
for h in headings {
if prev_level == 0 && h.level > 1 {
return false; }
if h.level > prev_level + 1 && prev_level > 0 {
return false; }
prev_level = h.level;
}
true
}
async fn compute_completeness(
vault_file: &VaultFile,
manager: &VaultManager,
file_path: &Path,
) -> CompletenessScore {
let word_count = vault_file.content.split_whitespace().count();
let link_count = vault_file.links.len();
let link_density = if word_count > 0 {
(link_count as f64 / word_count as f64) * 100.0
} else {
0.0
};
let has_tags = !vault_file.tags.is_empty();
let has_frontmatter = vault_file.frontmatter.is_some();
let frontmatter_keys = vault_file
.frontmatter
.as_ref()
.map(|fm| fm.data.len())
.unwrap_or(0);
let has_outgoing_links = link_count > 0;
let has_incoming_links = manager
.get_backlinks(file_path)
.await
.map(|bl| !bl.is_empty())
.unwrap_or(false);
let mut score = 0u8;
if word_count >= 300 {
score += 25;
} else if word_count >= 100 {
score += 15;
} else if word_count >= 30 {
score += 8;
}
if link_density >= 1.0 {
score += 20;
} else if link_density >= 0.5 {
score += 12;
} else if link_count > 0 {
score += 5;
}
if has_frontmatter && frontmatter_keys >= 3 {
score += 20;
} else if has_frontmatter {
score += 10;
}
if has_tags {
score += 15;
}
if has_outgoing_links && has_incoming_links {
score += 20;
} else if has_outgoing_links || has_incoming_links {
score += 10;
}
CompletenessScore {
score,
word_count,
link_density: (link_density * 100.0).round() / 100.0,
has_tags,
has_frontmatter,
frontmatter_keys,
has_outgoing_links,
has_incoming_links,
}
}
async fn compute_staleness(
vault_file: &VaultFile,
manager: &VaultManager,
file_path: &Path,
) -> StalenessScore {
let now = current_timestamp();
let secs_per_day = 86400.0;
let modified_at = vault_file.metadata.modified_at;
let created_at = vault_file.metadata.created_at;
let days_since_modified = ((now - modified_at) / secs_per_day).max(0.0) as u64;
let days_since_created = ((now - created_at) / secs_per_day).max(0.0) as u64;
let mut linked_notes_newer = 0usize;
if let Ok(forward) = manager.get_forward_links(file_path).await {
for linked_path in &forward {
if let Ok(linked_file) = manager.parse_file(linked_path).await
&& linked_file.metadata.modified_at > modified_at
{
linked_notes_newer += 1;
}
}
}
let score: u8 = if days_since_modified == 0 {
100
} else if days_since_modified <= 7 {
90
} else if days_since_modified <= 30 {
75
} else if days_since_modified <= 90 {
55
} else if days_since_modified <= 180 {
35
} else if days_since_modified <= 365 {
20
} else {
10
};
let newer_penalty: u8 = (linked_notes_newer.min(20) as u8) * 2;
StalenessScore {
score: score.saturating_sub(newer_penalty),
days_since_modified,
days_since_created,
linked_notes_newer,
}
}
fn generate_recommendations(
readability: &ReadabilityScore,
structure: &StructureScore,
completeness: &CompletenessScore,
staleness: &StalenessScore,
) -> Vec<String> {
let mut recs = Vec::new();
if readability.score < 50 {
if readability.avg_sentence_length > 25.0 {
recs.push("Break up long sentences for better readability".to_string());
}
if readability.vocabulary_diversity < 0.4 {
recs.push("Diversify vocabulary — text may be repetitive".to_string());
}
if readability.flesch_kincaid_grade > 14.0 {
recs.push("Simplify language — reading level is very high".to_string());
}
}
if !structure.has_title {
recs.push("Add a H1 title heading".to_string());
}
if !structure.has_frontmatter {
recs.push("Add YAML frontmatter with metadata".to_string());
}
if structure.tag_count == 0 {
recs.push("Add tags for discoverability".to_string());
}
if !structure.heading_hierarchy_valid {
recs.push(
"Fix heading hierarchy — avoid skipping levels (e.g., H1 directly to H3)".to_string(),
);
}
if completeness.word_count < 50 {
recs.push("Note is very short — consider expanding content".to_string());
}
if !completeness.has_outgoing_links {
recs.push("Add outgoing links to connect this note to related topics".to_string());
}
if !completeness.has_incoming_links {
recs.push("This note has no backlinks — link to it from related notes".to_string());
}
if staleness.days_since_modified > 180 {
recs.push(format!(
"Note hasn't been updated in {} days — review for accuracy",
staleness.days_since_modified
));
}
if staleness.linked_notes_newer > 3 {
recs.push(format!(
"{} linked notes have been updated more recently — this note may be outdated",
staleness.linked_notes_newer
));
}
recs.truncate(5);
recs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_count_syllables() {
assert_eq!(count_syllables("the"), 1);
assert_eq!(count_syllables("hello"), 2);
assert_eq!(count_syllables("beautiful"), 3);
assert_eq!(count_syllables("a"), 1);
}
#[test]
fn test_readability_empty() {
let score = compute_readability("");
assert_eq!(score.score, 0);
}
#[test]
fn test_readability_simple() {
let text = "The cat sat on the mat. The dog ran in the park.";
let score = compute_readability(text);
assert!(
score.score > 50,
"Simple text should score well: {}",
score.score
);
assert!(score.avg_sentence_length < 15.0);
}
#[test]
fn test_weighted_score() {
let score = weighted_score(80, 80, 80, 80);
assert_eq!(score, 80);
let score = weighted_score(100, 100, 100, 100);
assert_eq!(score, 100);
let score = weighted_score(0, 0, 0, 0);
assert_eq!(score, 0);
}
#[test]
fn test_heading_hierarchy() {
use turbovault_core::{Heading, SourcePosition};
let pos = SourcePosition::new(0, 0, 0, 0);
let good = vec![
Heading {
text: "H1".into(),
level: 1,
position: pos,
anchor: None,
},
Heading {
text: "H2".into(),
level: 2,
position: pos,
anchor: None,
},
Heading {
text: "H3".into(),
level: 3,
position: pos,
anchor: None,
},
];
assert!(check_heading_hierarchy(&good));
let bad = vec![
Heading {
text: "H1".into(),
level: 1,
position: pos,
anchor: None,
},
Heading {
text: "H3".into(),
level: 3,
position: pos,
anchor: None,
},
];
assert!(!check_heading_hierarchy(&bad));
}
}