use std::borrow::Cow;
use std::sync::LazyLock;
use crate::Lrc;
use crate::Token;
use crate::TokenKind;
use hashbrown::HashSet;
use crate::Punctuation;
use crate::spell::Dictionary;
use crate::{CharStringExt, Document, TokenStringExt, parsers::Parser};
pub fn make_title_case_str(source: &str, parser: &impl Parser, dict: &impl Dictionary) -> String {
let source: Lrc<_> = source.chars().collect();
make_title_case_chars(source, parser, dict).to_string()
}
pub fn make_title_case_chars(
source: Lrc<[char]>,
parser: &impl Parser,
dict: &impl Dictionary,
) -> Vec<char> {
let document = Document::new_from_chars(source.clone(), parser, dict);
make_title_case(document.get_tokens(), &source, dict)
}
pub fn try_make_title_case(
toks: &[Token],
source: &[char],
dict: &impl Dictionary,
) -> Option<Vec<char>> {
if toks.is_empty() {
return None;
}
let start_index = toks.first().unwrap().span.start;
let relevant_text = toks.span().unwrap().get_content(source);
let mut word_likes = toks.iter_word_like_indices().peekable();
let mut output = None;
let mut previous_word_index = 0;
let mut set_output_char = |idx: usize, new_char: char| {
if output
.as_ref()
.is_some_and(|o: &Vec<char>| o[idx] != new_char)
|| relevant_text[idx] != new_char
{
output.get_or_insert_with(|| relevant_text.to_vec())[idx] = new_char;
}
};
let mut seen_alphabetic_word = false;
while let Some(word_idx) = word_likes.next() {
let word = &toks[word_idx];
let is_alphabetic_word = word.get_ch(source).iter().any(|c| c.is_alphabetic());
if let Some(Some(metadata)) = word.kind.as_word()
&& metadata.is_proper_noun()
{
let orig_text = word.get_ch(source);
if let Some(correct_caps) = dict.get_correct_capitalization_of(orig_text) {
for (i, c) in correct_caps.iter().enumerate() {
if c.is_alphabetic() {
set_output_char(word.span.start - start_index + i, *c);
}
}
}
};
let is_after_colon = toks[previous_word_index..word_idx]
.iter()
.any(|tok| matches!(tok.kind, TokenKind::Punctuation(Punctuation::Colon)));
let is_first_alphabetic_word = is_alphabetic_word && !seen_alphabetic_word;
let should_capitalize = is_after_colon
|| should_capitalize_token(word, source)
|| is_first_alphabetic_word
|| word_likes.peek().is_none();
if should_capitalize {
set_output_char(
word.span.start - start_index,
relevant_text[word.span.start - start_index].to_ascii_uppercase(),
);
} else {
for i in word.span {
set_output_char(
i - start_index,
relevant_text[i - start_index].to_ascii_lowercase(),
);
}
}
if is_alphabetic_word {
seen_alphabetic_word = true;
}
previous_word_index = word_idx
}
if let Some(output) = &output
&& output.as_slice() == relevant_text
{
return None;
}
output
}
pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec<char> {
try_make_title_case(toks, source, dict)
.unwrap_or_else(|| toks.span().unwrap_or_default().get_content(source).to_vec())
}
fn should_capitalize_token(tok: &Token, source: &[char]) -> bool {
match &tok.kind {
TokenKind::Word(Some(metadata)) => {
static SPECIAL_CONJUNCTIONS: LazyLock<HashSet<Vec<char>>> = LazyLock::new(|| {
["and", "but", "for", "or", "nor", "as"]
.iter()
.map(|v| v.chars().collect())
.collect()
});
static SPECIAL_ARTICLES: LazyLock<HashSet<Vec<char>>> = LazyLock::new(|| {
["a", "an", "the"]
.iter()
.map(|v| v.chars().collect())
.collect()
});
let chars = tok.get_ch(source);
let chars_lower = chars.to_lower();
let metadata = Cow::Borrowed(metadata);
let is_short_preposition = metadata.preposition && tok.span.len() <= 4;
if chars_lower.as_ref() == ['a', 'l', 'l'] {
return true;
}
!is_short_preposition
&& !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_ref())
&& !SPECIAL_ARTICLES.contains(chars_lower.as_ref())
}
_ => true,
}
}
#[cfg(test)]
mod tests {
use quickcheck::TestResult;
use quickcheck_macros::quickcheck;
use super::make_title_case_str;
use crate::parsers::{Markdown, PlainEnglish};
use crate::spell::FstDictionary;
#[test]
fn normal() {
assert_eq!(
make_title_case_str("this is a test", &PlainEnglish, &FstDictionary::curated()),
"This Is a Test"
)
}
#[test]
fn complex() {
assert_eq!(
make_title_case_str(
"the first and last words should be capitalized, even if it is \"the\"",
&PlainEnglish,
&FstDictionary::curated()
),
"The First and Last Words Should Be Capitalized, Even If It Is \"The\""
)
}
#[test]
fn about_uppercase_with_numbers() {
assert_eq!(
make_title_case_str("0 about 0", &PlainEnglish, &FstDictionary::curated()),
"0 About 0"
)
}
#[test]
fn pipe_does_not_cause_crash() {
assert_eq!(
make_title_case_str("|", &Markdown::default(), &FstDictionary::curated()),
"|"
)
}
#[test]
fn a_paragraph_does_not_cause_crash() {
assert_eq!(
make_title_case_str("A\n", &Markdown::default(), &FstDictionary::curated()),
"A"
)
}
#[test]
fn tab_a_becomes_upcase() {
assert_eq!(
make_title_case_str("\ta", &PlainEnglish, &FstDictionary::curated()),
"\tA"
)
}
#[test]
fn fixes_video_press() {
assert_eq!(
make_title_case_str("videopress", &PlainEnglish, &FstDictionary::curated()),
"VideoPress"
)
}
#[quickcheck]
fn a_stays_lowercase(prefix: String, postfix: String) -> TestResult {
if prefix.chars().any(|c| !c.is_ascii_alphabetic())
|| prefix.is_empty()
|| postfix.chars().any(|c| !c.is_ascii_alphabetic())
|| postfix.is_empty()
{
return TestResult::discard();
}
let title_case: Vec<_> = make_title_case_str(
&format!("{prefix} a {postfix}"),
&Markdown::default(),
&FstDictionary::curated(),
)
.chars()
.collect();
TestResult::from_bool(title_case[prefix.chars().count() + 1] == 'a')
}
#[quickcheck]
fn about_becomes_uppercase(prefix: String, postfix: String) -> TestResult {
if prefix.chars().any(|c| !c.is_ascii_alphanumeric())
|| prefix.is_empty()
|| postfix.chars().any(|c| !c.is_ascii_alphanumeric())
|| postfix.is_empty()
{
return TestResult::discard();
}
let title_case: Vec<_> = make_title_case_str(
&format!("{prefix} about {postfix}"),
&Markdown::default(),
&FstDictionary::curated(),
)
.chars()
.collect();
TestResult::from_bool(title_case[prefix.chars().count() + 1] == 'A')
}
#[quickcheck]
fn first_word_is_upcase(text: String) -> TestResult {
let title_case: Vec<_> =
make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated())
.chars()
.collect();
if let Some(first) = title_case.first() {
if first.is_ascii_alphabetic() {
TestResult::from_bool(first.is_ascii_uppercase())
} else {
TestResult::discard()
}
} else {
TestResult::discard()
}
}
#[test]
fn united_states() {
assert_eq!(
make_title_case_str("united states", &PlainEnglish, &FstDictionary::curated()),
"United States"
)
}
#[test]
fn keeps_decimal() {
assert_eq!(
make_title_case_str(
"harper turns 1.0 today",
&PlainEnglish,
&FstDictionary::curated()
),
"Harper Turns 1.0 Today"
)
}
#[test]
fn fixes_odd_capitalized_proper_nouns() {
assert_eq!(
make_title_case_str(
"i spoke at wordcamp u.s. in 2025",
&PlainEnglish,
&FstDictionary::curated()
),
"I Spoke at WordCamp U.S. in 2025",
);
}
#[test]
fn fixes_your_correctly() {
assert_eq!(
make_title_case_str(
"it is not your friend",
&PlainEnglish,
&FstDictionary::curated()
),
"It Is Not Your Friend",
);
}
#[test]
fn handles_old_man_and_the_sea() {
assert_eq!(
make_title_case_str(
"the old man and the sea",
&PlainEnglish,
&FstDictionary::curated()
),
"The Old Man and the Sea",
);
}
#[test]
fn handles_great_story_with_subtitle() {
assert_eq!(
make_title_case_str(
"the great story: a tale of two cities",
&PlainEnglish,
&FstDictionary::curated()
),
"The Great Story: A Tale of Two Cities",
);
}
#[test]
fn handles_lantern_and_moths() {
assert_eq!(
make_title_case_str(
"lantern flickered; moths began their worship",
&PlainEnglish,
&FstDictionary::curated()
),
"Lantern Flickered; Moths Began Their Worship",
);
}
#[test]
fn handles_static_with_ghosts() {
assert_eq!(
make_title_case_str(
"static filled the room with ghosts",
&PlainEnglish,
&FstDictionary::curated()
),
"Static Filled the Room with Ghosts",
);
}
#[test]
fn handles_glass_trembled_before_thunder() {
assert_eq!(
make_title_case_str(
"glass trembled before thunder arrived.",
&PlainEnglish,
&FstDictionary::curated()
),
"Glass Trembled Before Thunder Arrived.",
);
}
#[test]
fn handles_hepatitis_b_shots() {
assert_eq!(
make_title_case_str(
"an end to hepatitis b shots for all newborns",
&PlainEnglish,
&FstDictionary::curated()
),
"An End to Hepatitis B Shots for All Newborns",
);
}
#[test]
fn handles_trump_approval_rating() {
assert_eq!(
make_title_case_str(
"trump's approval rating dips as views of his handling of the economy sour",
&PlainEnglish,
&FstDictionary::curated()
),
"Trump's Approval Rating Dips as Views of His Handling of the Economy Sour",
);
}
#[test]
fn handles_last_door() {
assert_eq!(
make_title_case_str("the last door", &PlainEnglish, &FstDictionary::curated()),
"The Last Door",
);
}
#[test]
fn handles_midnight_river() {
assert_eq!(
make_title_case_str("midnight river", &PlainEnglish, &FstDictionary::curated()),
"Midnight River",
);
}
#[test]
fn handles_a_quiet_room() {
assert_eq!(
make_title_case_str("a quiet room", &PlainEnglish, &FstDictionary::curated()),
"A Quiet Room",
);
}
#[test]
fn handles_broken_map() {
assert_eq!(
make_title_case_str("broken map", &PlainEnglish, &FstDictionary::curated()),
"Broken Map",
);
}
#[test]
fn handles_fire_in_autumn() {
assert_eq!(
make_title_case_str("fire in autumn", &PlainEnglish, &FstDictionary::curated()),
"Fire in Autumn",
);
}
#[test]
fn handles_hidden_path() {
assert_eq!(
make_title_case_str("the hidden path", &PlainEnglish, &FstDictionary::curated()),
"The Hidden Path",
);
}
#[test]
fn handles_under_blue_skies() {
assert_eq!(
make_title_case_str("under blue skies", &PlainEnglish, &FstDictionary::curated()),
"Under Blue Skies",
);
}
#[test]
fn handles_lost_and_found() {
assert_eq!(
make_title_case_str("lost and found", &PlainEnglish, &FstDictionary::curated()),
"Lost and Found",
);
}
#[test]
fn handles_silent_watcher() {
assert_eq!(
make_title_case_str(
"the silent watcher",
&PlainEnglish,
&FstDictionary::curated()
),
"The Silent Watcher",
);
}
#[test]
fn handles_winter_road() {
assert_eq!(
make_title_case_str("winter road", &PlainEnglish, &FstDictionary::curated()),
"Winter Road",
);
}
#[test]
fn maintains_same_apostrophe_type() {
assert_eq!(
make_title_case_str(
"Alice’s Adventures in Wonderland",
&PlainEnglish,
&FstDictionary::curated()
),
"Alice’s Adventures in Wonderland",
);
}
#[test]
fn doesnt_lowercase_this_in_github_template_title() {
assert_eq!(
make_title_case_str(
"# How Has This Been Tested?",
&PlainEnglish,
&FstDictionary::curated()
),
"# How Has This Been Tested?",
);
}
}