#[derive(Debug, Clone)]
pub struct WhitespaceNormalizer {
preserve_layout_mode: bool,
}
impl WhitespaceNormalizer {
pub fn new(preserve_layout_mode: bool) -> Self {
Self {
preserve_layout_mode,
}
}
pub fn normalize(&self, text: &str) -> String {
if self.preserve_layout_mode {
return text.to_string();
}
let paragraphs: Vec<&str> = text.split("\n\n").collect();
let normalized_paragraphs: Vec<String> = paragraphs
.iter()
.map(|para| self.normalize_paragraph(para))
.collect();
normalized_paragraphs.join("\n\n")
}
fn normalize_paragraph(&self, text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let normalized_lines: Vec<String> = lines
.iter()
.map(|line| self.normalize_line(line))
.filter(|line| !line.is_empty()) .collect();
normalized_lines.join("\n")
}
fn normalize_line(&self, line: &str) -> String {
let trimmed = line.trim();
if trimmed.is_empty() {
return String::new();
}
let mut result = String::new();
let mut prev_was_space = false;
for ch in trimmed.chars() {
if ch.is_whitespace() {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
}
} else {
result.push(ch);
prev_was_space = false;
}
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_single_space() {
let normalizer = WhitespaceNormalizer::new(false);
assert_eq!(normalizer.normalize("hello world"), "hello world");
}
#[test]
fn test_normalize_multiple_spaces() {
let normalizer = WhitespaceNormalizer::new(false);
assert_eq!(normalizer.normalize("hello world"), "hello world");
}
#[test]
fn test_normalize_tabs() {
let normalizer = WhitespaceNormalizer::new(false);
assert_eq!(normalizer.normalize("hello\t\tworld"), "hello world");
}
#[test]
fn test_preserve_layout_mode() {
let normalizer = WhitespaceNormalizer::new(true);
let text = "hello world";
assert_eq!(normalizer.normalize(text), text);
}
}