pub trait Chunker: Send + Sync {
fn id(&self) -> &str;
fn split<'t>(&self, text: &'t str) -> Vec<&'t str>;
}
#[derive(Debug, Clone, Default)]
pub struct IdentityChunker;
impl IdentityChunker {
pub const ID: &'static str = "identity";
}
impl Chunker for IdentityChunker {
fn id(&self) -> &str {
Self::ID
}
fn split<'t>(&self, text: &'t str) -> Vec<&'t str> {
vec![text]
}
}
#[derive(Debug, Clone, Default)]
pub struct LineChunker;
impl LineChunker {
pub const ID: &'static str = "line";
}
impl Chunker for LineChunker {
fn id(&self) -> &str {
Self::ID
}
fn split<'t>(&self, text: &'t str) -> Vec<&'t str> {
text.split('\n').filter(|l| !l.is_empty()).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn identity_returns_the_input() {
let c = IdentityChunker;
let chunks = c.split("hello world");
assert_eq!(chunks, vec!["hello world"]);
}
#[test]
fn identity_empty_input_still_returns_one_chunk() {
let chunks = IdentityChunker.split("");
assert_eq!(chunks, vec![""]);
}
#[test]
fn identity_id_is_stable() {
assert_eq!(IdentityChunker.id(), "identity");
assert_eq!(IdentityChunker::ID, "identity");
}
#[test]
fn identity_is_deterministic() {
let c = IdentityChunker;
for s in ["foo", "", "multi\nline\ntext", "with unicode 日本語"] {
assert_eq!(c.split(s), c.split(s));
}
}
#[test]
fn chunker_is_object_safe() {
let boxed: Box<dyn Chunker> = Box::new(IdentityChunker);
assert_eq!(boxed.id(), "identity");
}
#[test]
fn line_chunker_splits_on_newlines() {
let chunks = LineChunker.split("alpha\nbeta\ngamma");
assert_eq!(chunks, vec!["alpha", "beta", "gamma"]);
}
#[test]
fn line_chunker_drops_empty_lines() {
let chunks = LineChunker.split("alpha\n\nbeta\n");
assert_eq!(chunks, vec!["alpha", "beta"]);
}
#[test]
fn line_chunker_single_line_returns_one_chunk() {
let chunks = LineChunker.split("just one line");
assert_eq!(chunks, vec!["just one line"]);
}
#[test]
fn line_chunker_empty_input_returns_no_chunks() {
assert_eq!(LineChunker.split("").len(), 0);
}
#[test]
fn line_chunker_id_is_stable() {
assert_eq!(LineChunker.id(), "line");
assert_eq!(LineChunker::ID, "line");
}
}