fn is_any_whitespace(c: char) -> bool {
c.is_whitespace() ||
c == '\u{00A0}' || c == '\u{2007}' || c == '\u{202F}' || c == '\u{3000}' || c == '\u{FEFF}' }
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BoldMarkerDecision {
Insert,
Skip(ValidatorError),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValidatorError {
WhitespaceOnly,
InvalidOpeningBoundary,
InvalidClosingBoundary,
EmptyAfterFormatting,
NotBold,
}
#[derive(Debug, Clone)]
pub struct BoldGroup {
pub text: String,
pub is_bold: bool,
pub first_char_in_group: Option<char>,
pub last_char_in_group: Option<char>,
}
impl BoldGroup {
pub fn has_word_content(&self) -> bool {
self.text.chars().any(|c| !is_any_whitespace(c))
}
pub fn has_valid_opening_boundary(&self) -> bool {
match self.first_char_in_group {
Some(c) => {
let is_word_char = c.is_alphabetic() || c.is_numeric();
let is_not_whitespace = !is_any_whitespace(c);
is_word_char && is_not_whitespace
},
None => false,
}
}
pub fn has_valid_closing_boundary(&self) -> bool {
match self.last_char_in_group {
Some(c) => {
let is_word_char = c.is_alphabetic() || c.is_numeric();
let is_not_whitespace = !is_any_whitespace(c);
is_word_char && is_not_whitespace
},
None => false,
}
}
pub fn simulated_formatted_content(&self) -> String {
self.text.clone()
}
}
pub struct BoldMarkerValidator;
impl BoldMarkerValidator {
pub fn validate_boundary_context(
preceding_text: &str,
_group_text: &str,
following_text: &str,
) -> bool {
let has_space_before = preceding_text.ends_with(' ')
|| preceding_text.ends_with('\n')
|| preceding_text.is_empty();
let has_space_after = following_text.starts_with(' ')
|| following_text.starts_with('\n')
|| following_text.is_empty();
has_space_before && has_space_after
}
pub fn can_insert_markers(group: &BoldGroup) -> BoldMarkerDecision {
if !group.is_bold {
log::debug!(
"Rejecting bold markers: not marked bold for '{}'",
group.text.chars().take(20).collect::<String>()
);
return BoldMarkerDecision::Skip(ValidatorError::NotBold);
}
if !group.has_word_content() {
log::debug!(
"Rejecting bold markers: no word content in '{}'",
group.text.chars().take(20).collect::<String>()
);
return BoldMarkerDecision::Skip(ValidatorError::WhitespaceOnly);
}
if !group.has_valid_opening_boundary() {
log::debug!(
"Rejecting bold markers: invalid opening boundary '{}' in '{}'",
group.first_char_in_group.unwrap_or('?'),
group.text.chars().take(20).collect::<String>()
);
return BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary);
}
if !group.has_valid_closing_boundary() {
log::debug!(
"Rejecting bold markers: invalid closing boundary '{}' in '{}'",
group.last_char_in_group.unwrap_or('?'),
group.text.chars().take(20).collect::<String>()
);
return BoldMarkerDecision::Skip(ValidatorError::InvalidClosingBoundary);
}
let formatted = group.simulated_formatted_content();
if formatted.trim().is_empty() {
log::debug!("Rejecting bold markers: content became empty after formatting");
return BoldMarkerDecision::Skip(ValidatorError::EmptyAfterFormatting);
}
BoldMarkerDecision::Insert
}
pub fn validate_group_sequence(groups: &[BoldGroup]) -> Result<(), String> {
for (idx, group) in groups.iter().enumerate() {
match Self::can_insert_markers(group) {
BoldMarkerDecision::Skip(err) if group.is_bold => {
log::warn!(
"Group {}: {:?}: '{}'",
idx,
err,
group.text.chars().take(20).collect::<String>()
);
},
_ => {},
}
}
Ok(())
}
pub fn predict_markdown(group: &BoldGroup) -> String {
match Self::can_insert_markers(group) {
BoldMarkerDecision::Insert => {
format!("**{}**", group.text)
},
BoldMarkerDecision::Skip(_) => group.text.clone(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_bold_group() {
let group = BoldGroup {
text: "hello".to_string(),
is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('o'),
};
assert_eq!(BoldMarkerValidator::can_insert_markers(&group), BoldMarkerDecision::Insert);
}
#[test]
fn test_whitespace_only_group() {
let group = BoldGroup {
text: " ".to_string(),
is_bold: true,
first_char_in_group: Some(' '),
last_char_in_group: Some(' '),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&group),
BoldMarkerDecision::Skip(ValidatorError::WhitespaceOnly)
);
}
#[test]
fn test_invalid_opening_boundary() {
let group = BoldGroup {
text: "hello".to_string(),
is_bold: true,
first_char_in_group: Some(' '), last_char_in_group: Some('o'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&group),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
}
#[test]
fn test_invalid_closing_boundary() {
let group = BoldGroup {
text: "hello".to_string(),
is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some(' '), };
assert_eq!(
BoldMarkerValidator::can_insert_markers(&group),
BoldMarkerDecision::Skip(ValidatorError::InvalidClosingBoundary)
);
}
#[test]
fn test_predict_markdown() {
let valid = BoldGroup {
text: "hello".to_string(),
is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('o'),
};
assert_eq!(BoldMarkerValidator::predict_markdown(&valid), "**hello**");
let whitespace = BoldGroup {
text: " ".to_string(),
is_bold: true,
first_char_in_group: Some(' '),
last_char_in_group: Some(' '),
};
assert_eq!(BoldMarkerValidator::predict_markdown(&whitespace), " ");
}
#[test]
fn test_bold_respects_word_boundaries() {
assert!(BoldMarkerValidator::validate_boundary_context("word ", "bold", " text"));
assert!(BoldMarkerValidator::validate_boundary_context("", "bold", " text"));
assert!(BoldMarkerValidator::validate_boundary_context("word ", "bold", ""));
assert!(BoldMarkerValidator::validate_boundary_context("", "bold", ""));
}
#[test]
fn test_bold_between_spaces() {
assert!(!BoldMarkerValidator::validate_boundary_context("the", "Bold", " word"));
assert!(!BoldMarkerValidator::validate_boundary_context("the ", "bold", "ness"));
assert!(!BoldMarkerValidator::validate_boundary_context("the", "Bold", "ness"));
}
#[test]
fn test_camelcase_split_not_bolded_individually() {
assert!(BoldMarkerValidator::validate_boundary_context("the ", "General", ""));
assert!(!BoldMarkerValidator::validate_boundary_context("the", "General", ""));
}
#[test]
fn test_newline_as_boundary() {
assert!(BoldMarkerValidator::validate_boundary_context("text\n", "bold", " more"));
assert!(BoldMarkerValidator::validate_boundary_context("text ", "bold", "\n"));
assert!(BoldMarkerValidator::validate_boundary_context("text\n", "bold", "\n"));
}
#[test]
fn test_punctuation_not_bolded() {
let punct_group = BoldGroup {
text: "---".to_string(),
is_bold: true,
first_char_in_group: Some('-'),
last_char_in_group: Some('-'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&punct_group),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
}
#[test]
fn test_numeric_content_can_be_bold() {
let num_group = BoldGroup {
text: "2024".to_string(),
is_bold: true,
first_char_in_group: Some('2'),
last_char_in_group: Some('4'),
};
assert_eq!(BoldMarkerValidator::can_insert_markers(&num_group), BoldMarkerDecision::Insert);
}
#[test]
fn test_alphanumeric_mixed_content() {
let mixed_group = BoldGroup {
text: "version2024".to_string(),
is_bold: true,
first_char_in_group: Some('v'),
last_char_in_group: Some('4'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&mixed_group),
BoldMarkerDecision::Insert
);
}
#[test]
fn test_no_empty_bold_markers_regression() {
let empty_group = BoldGroup {
text: " ".to_string(),
is_bold: true,
first_char_in_group: Some(' '),
last_char_in_group: Some(' '),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&empty_group),
BoldMarkerDecision::Skip(ValidatorError::WhitespaceOnly)
);
assert_eq!(BoldMarkerValidator::predict_markdown(&empty_group), " ");
}
#[test]
fn test_fix_2b_nbsp_treated_as_whitespace() {
let nbsp_group = BoldGroup {
text: "\u{00A0}hello".to_string(), is_bold: true,
first_char_in_group: Some('\u{00A0}'),
last_char_in_group: Some('o'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers( _group),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
}
#[test]
fn test_fix_2b_figure_space_treated_as_whitespace() {
let fig_space_group = BoldGroup {
text: "hello\u{2007}".to_string(), is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('\u{2007}'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&fig_space_group),
BoldMarkerDecision::Skip(ValidatorError::InvalidClosingBoundary)
);
}
#[test]
fn test_fix_2b_narrow_nbsp_treated_as_whitespace() {
let narrow_nbsp_group = BoldGroup {
text: "hello\u{202F}world".to_string(), is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('d'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&narrow_nbsp_group),
BoldMarkerDecision::Insert
);
}
#[test]
fn test_fix_2b_ideographic_space_treated_as_whitespace() {
let ideo_space_group = BoldGroup {
text: "hello\u{3000}".to_string(), is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('\u{3000}'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&ideo_space_group),
BoldMarkerDecision::Skip(ValidatorError::InvalidClosingBoundary)
);
}
#[test]
fn test_fix_2b_unicode_bom_treated_as_whitespace() {
let bom_group = BoldGroup {
text: "\u{FEFF}hello".to_string(), is_bold: true,
first_char_in_group: Some('\u{FEFF}'),
last_char_in_group: Some('o'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&bom_group),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
}
#[test]
fn test_fix_2b_has_word_content_with_unicode_whitespace() {
let nbsp_only = BoldGroup {
text: "\u{00A0}\u{00A0}".to_string(),
is_bold: true,
first_char_in_group: Some('\u{00A0}'),
last_char_in_group: Some('\u{00A0}'),
};
assert!(!nbsp_only.has_word_content());
let nbsp_mixed = BoldGroup {
text: "\u{00A0}hello\u{00A0}".to_string(),
is_bold: true,
first_char_in_group: Some('\u{00A0}'),
last_char_in_group: Some('\u{00A0}'),
};
assert!(nbsp_mixed.has_word_content());
let fig_mixed = BoldGroup {
text: "\u{2007}world\u{2007}".to_string(),
is_bold: true,
first_char_in_group: Some('\u{2007}'),
last_char_in_group: Some('\u{2007}'),
};
assert!(fig_mixed.has_word_content());
}
#[test]
#[ignore]
fn test_fix_2b_no_empty_markers_with_unicode_spaces() {
let unicode_only = BoldGroup {
text: "\u{00A0}\u{2007}\u{202F}\u{3000}".to_string(),
is_bold: true,
first_char_in_group: Some('\u{00A0}'),
last_char_in_group: Some('\u{3000}'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&unicode_only),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
assert_eq!(BoldMarkerValidator::predict_markdown(&unicode_only), unicode_only.text);
let valid_with_unicode = BoldGroup {
text: "\u{00A0}hello\u{00A0}".to_string(),
is_bold: true,
first_char_in_group: Some('h'), last_char_in_group: Some('o'), };
assert_eq!(
BoldMarkerValidator::can_insert_markers(&valid_with_unicode),
BoldMarkerDecision::Insert
);
}
#[test]
fn test_fix_2b_policy_pdf_scenario() {
let policy_text = BoldGroup {
text: "Policy\u{00A0}".to_string(), is_bold: true,
first_char_in_group: Some('P'),
last_char_in_group: Some('\u{00A0}'), };
assert_eq!(
BoldMarkerValidator::can_insert_markers(&policy_text),
BoldMarkerDecision::Skip(ValidatorError::InvalidClosingBoundary)
);
}
#[test]
fn test_fix_2b_combined_with_ascii_whitespace() {
let combined = BoldGroup {
text: " \u{00A0}text\u{00A0} ".to_string(),
is_bold: true,
first_char_in_group: Some(' '), last_char_in_group: Some(' '), };
assert_eq!(
BoldMarkerValidator::can_insert_markers(&combined),
BoldMarkerDecision::Skip(ValidatorError::InvalidOpeningBoundary)
);
assert!(combined.has_word_content());
}
#[test]
fn test_fix_2b_unicode_space_in_middle_allowed() {
let internal_space = BoldGroup {
text: "hello\u{00A0}world".to_string(),
is_bold: true,
first_char_in_group: Some('h'),
last_char_in_group: Some('d'),
};
assert_eq!(
BoldMarkerValidator::can_insert_markers(&internal_space),
BoldMarkerDecision::Insert
);
assert_eq!(
BoldMarkerValidator::predict_markdown(&internal_space),
"**hello\u{00A0}world**"
);
}
}