use std::{ops::Range, fmt::Display};
use serde::{Deserialize, Serialize};
use thiserror::Error;
pub trait Chunker: Send + Sync {
fn chunk(&self, source_text: &str) -> Result<Vec<ChunkData>, ChunkerError>;
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ChunkData {
pub text_range: Range<usize>,
pub heading_path: Option<String>,
pub token_count: Option<usize>,
}
impl ChunkData {
pub fn to_chunk<'a>(&'a self, source_text: &'a str) -> Chunk<'a> {
Chunk {
data: self,
source_text,
}
}
}
#[derive(Debug, Error)]
pub enum ChunkerError {
#[error("Chunker processing failed: {0}")]
Processing(String),
#[error("Invalid chunker configuration: {0}")]
Configuration(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Chunk<'a> {
pub(crate) data: &'a ChunkData,
pub(crate) source_text: &'a str,
}
impl<'a> Chunk<'a> {
pub fn text(&self) -> &'a str {
&self.source_text[self.data.text_range.clone()]
}
pub fn heading_path(&self) -> Option<&'a str> {
self.data.heading_path.as_deref()
}
}
impl<'a> AsRef<ChunkData> for Chunk<'a> {
fn as_ref(&self) -> &ChunkData {
self.data
}
}
impl<'a> Display for Chunk<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(heading) = self.heading_path() {
write!(f, "{}:\n\n{}", heading, self.text())
} else {
write!(f, "{}", self.text())
}
}
}