#[cfg(test)]
mod word_count_tests {
use bt_string_utils::analyzer::word_count;
#[test]
fn basic_words() {
assert_eq!(word_count("Hello world"), 2);
assert_eq!(word_count("One two three"), 3);
}
#[test]
fn punctuation_handling() {
assert_eq!(word_count("Hello, world!"), 2);
assert_eq!(word_count("(test)"), 1);
assert_eq!(word_count("\"quoted\""), 1);
}
#[test]
fn multiple_whitespace() {
assert_eq!(word_count("a b\tc\nd"), 4);
assert_eq!(word_count(" spaced out "), 2);
}
#[test]
fn hyphenated_words() {
assert_eq!(word_count("state-of-the-art"), 1);
assert_eq!(word_count("mother-in-law"), 1);
}
#[test]
fn contractions() {
assert_eq!(word_count("don't stop"), 2);
assert_eq!(word_count("I'm here"), 2);
assert_eq!(word_count("they're coming"), 2);
}
#[test]
fn urls() {
assert_eq!(word_count("Visit https://example.com now"), 3);
assert_eq!(word_count("example.com/test"), 1);
}
#[test]
fn emojis() {
assert_eq!(word_count("🙂"), 1);
assert_eq!(word_count("Hello 🙂 world"), 3);
}
#[test]
fn empty_and_whitespace_only() {
assert_eq!(word_count(""), 0);
assert_eq!(word_count(" "), 0);
assert_eq!(word_count("\n\t "), 0);
}
}
#[cfg(test)]
mod count_paragraphs_tests {
use bt_string_utils::analyzer::count_paragraphs;
#[test]
fn single_paragraph_no_newline() {
assert_eq!(count_paragraphs("Hello world"), 1);
}
#[test]
fn two_paragraphs_unix_newline() {
assert_eq!(count_paragraphs("Hello\nWorld"), 2);
}
#[test]
fn two_paragraphs_windows_newline() {
assert_eq!(count_paragraphs("Hello\r\nWorld"), 2);
}
#[test]
fn two_paragraphs_old_mac_newline() {
assert_eq!(count_paragraphs("Hello\rWorld"), 2);
}
#[test]
fn empty_document() {
assert_eq!(count_paragraphs(""), 0);
}
#[test]
fn newline_only() {
assert_eq!(count_paragraphs("\n"), 1);
}
#[test]
fn cr_only() {
assert_eq!(count_paragraphs("\r"), 1);
}
#[test]
fn crnl_only() {
assert_eq!(count_paragraphs("\r\n"), 1);
}
#[test]
fn trailing_newline_creates_empty_paragraph() {
assert_eq!(count_paragraphs("Hello\n"), 2);
assert_eq!(count_paragraphs("Hello\r\n"), 2);
}
#[test]
fn multiple_empty_paragraphs() {
assert_eq!(count_paragraphs("A\n\nB"), 3);
assert_eq!(count_paragraphs("A\n\n\nB"), 4);
}
#[test]
fn paragraphs_with_whitespace_only_lines() {
assert_eq!(count_paragraphs("A\n \nB"), 3);
}
#[test]
fn mixed_newline_types() {
let text = "A\r\nB\nC\rD";
assert_eq!(count_paragraphs(text), 4);
}
}
#[cfg(test)]
mod split_chunk_tests {
use bt_string_utils::splitter::split_into_chunks;
const CHUNK_SIZE_BYTES: usize = 30_000;
#[test]
fn test_single_chunk() {
let input = "Hello, world!";
let chunks = split_into_chunks(input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], input);
}
#[test]
fn test_multiple_chunks() {
let input = "a".repeat(60_000);
let chunks = split_into_chunks(&input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 2);
assert_eq!(chunks[0].len(), CHUNK_SIZE_BYTES);
assert_eq!(chunks[1].len(), CHUNK_SIZE_BYTES);
}
#[test]
fn test_multi_byte_characters() {
let input = "This is a test with emoji: 🦄🚀";
let chunks = split_into_chunks(input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 1); assert!(chunks[0].contains("🦄"));
assert!(chunks[0].contains("🚀"));
}
#[test]
fn test_single_character() {
let input = "A";
let chunks = split_into_chunks(input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], input);
}
#[test]
fn test_empty_string() {
let input = "";
let chunks = split_into_chunks(input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 0); }
#[test]
fn test_large_input() {
let input = "a".repeat(100_000); let chunks = split_into_chunks(&input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 4); assert_eq!(chunks[0].len(), CHUNK_SIZE_BYTES);
assert_eq!(chunks[1].len(), CHUNK_SIZE_BYTES);
assert_eq!(chunks[2].len(), CHUNK_SIZE_BYTES);
assert_eq!(chunks[3].len(), 10_000); }
#[test]
fn test_dont_split_multi_byte_characters() {
let input = "This is a test with a Chinese character: å—";
let chunks = split_into_chunks(input, CHUNK_SIZE_BYTES);
assert_eq!(chunks.len(), 1);
assert!(chunks[0].contains("å—")); }
}