pub mod ascii {
pub fn to_lowercase_optimized(input: &str) -> String {
let bytes = input.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let chunks = bytes.chunks_exact(8);
let remainder = chunks.remainder();
for chunk in chunks {
let mut processed = [0u8; 8];
for (i, &byte) in chunk.iter().enumerate() {
if byte.is_ascii_uppercase() {
processed[i] = byte + 32; } else {
processed[i] = byte;
}
}
result.extend_from_slice(&processed);
}
for &byte in remainder {
if byte.is_ascii_uppercase() {
result.push(byte + 32);
} else {
result.push(byte);
}
}
unsafe { String::from_utf8_unchecked(result) }
}
pub fn to_lowercase_fallback(input: &str) -> String {
input.to_lowercase()
}
pub fn to_lowercase(input: &str) -> String {
if input.is_ascii() && input.len() >= 16 {
to_lowercase_optimized(input)
} else {
to_lowercase_fallback(input)
}
}
pub fn find_whitespace_optimized(input: &[u8]) -> Option<usize> {
if input.len() < 8 {
return input.iter().position(|&b| b.is_ascii_whitespace());
}
let mut chunks = input.chunks_exact(8);
let remainder = chunks.remainder();
let mut chunk_idx = 0;
for chunk in &mut chunks {
for (byte_idx, &byte) in chunk.iter().enumerate() {
if byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' {
return Some(chunk_idx * 8 + byte_idx);
}
}
chunk_idx += 1;
}
let base_offset = chunk_idx * 8;
remainder
.iter()
.position(|&b| b.is_ascii_whitespace())
.map(|pos| base_offset + pos)
}
pub fn find_whitespace_simd(input: &[u8]) -> Option<usize> {
find_whitespace_optimized(input)
}
}
pub mod numeric {
pub fn batch_bm25_tf(term_freqs: &[f32], k1: f32, norm_factors: &[f32]) -> Vec<f32> {
assert_eq!(term_freqs.len(), norm_factors.len());
let mut results = Vec::with_capacity(term_freqs.len());
let chunks = term_freqs.chunks_exact(4);
let remainder = chunks.remainder();
let norm_chunks = norm_factors.chunks_exact(4);
for (tf_chunk, norm_chunk) in chunks.zip(norm_chunks) {
let mut batch_results = [0.0f32; 4];
for i in 0..4 {
let tf = tf_chunk[i];
let norm = norm_chunk[i];
batch_results[i] = (tf * (k1 + 1.0)) / (tf + k1 * norm);
}
results.extend_from_slice(&batch_results);
}
let norm_remainder = &norm_factors[norm_factors.len() - remainder.len()..];
for (tf, norm) in remainder.iter().zip(norm_remainder.iter()) {
let tf_score = (tf * (k1 + 1.0)) / (tf + k1 * norm);
results.push(tf_score);
}
results
}
pub fn batch_bm25_final_score(
tf_scores: &[f32],
idf_scores: &[f32],
boosts: &[f32],
) -> Vec<f32> {
assert_eq!(tf_scores.len(), idf_scores.len());
assert_eq!(tf_scores.len(), boosts.len());
let mut results = Vec::with_capacity(tf_scores.len());
let chunks = tf_scores.chunks_exact(4);
let remainder = chunks.remainder();
let idf_chunks = idf_scores.chunks_exact(4);
let boost_chunks = boosts.chunks_exact(4);
for ((tf_chunk, idf_chunk), boost_chunk) in chunks.zip(idf_chunks).zip(boost_chunks) {
let mut batch_results = [0.0f32; 4];
for i in 0..4 {
batch_results[i] = idf_chunk[i] * tf_chunk[i] * boost_chunk[i];
}
results.extend_from_slice(&batch_results);
}
let idf_remainder = &idf_scores[idf_scores.len() - remainder.len()..];
let boost_remainder = &boosts[boosts.len() - remainder.len()..];
for ((tf, idf), boost) in remainder
.iter()
.zip(idf_remainder.iter())
.zip(boost_remainder.iter())
{
results.push(idf * tf * boost);
}
results
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_optimized_lowercase_ascii() {
let input = "HELLO WORLD THIS IS A TEST STRING FOR OPTIMIZATION";
let expected = "hello world this is a test string for optimization";
let result = ascii::to_lowercase(input);
assert_eq!(result, expected);
}
#[test]
fn test_optimized_lowercase_mixed() {
let input = "Hello World 123 ABC def";
let expected = "hello world 123 abc def";
let result = ascii::to_lowercase(input);
assert_eq!(result, expected);
}
#[test]
fn test_optimized_lowercase_empty() {
let result = ascii::to_lowercase("");
assert_eq!(result, "");
}
#[test]
fn test_optimized_lowercase_short() {
let input = "ABC";
let expected = "abc";
let result = ascii::to_lowercase(input);
assert_eq!(result, expected);
}
#[test]
fn test_fallback_for_unicode() {
let input = "Héllo Wörld"; let result = ascii::to_lowercase(input);
let expected = input.to_lowercase();
assert_eq!(result, expected);
}
}