julienne 0.1.0

Range-preserving Rust text chunkers for retrieval and embedding pipelines
Documentation
use std::fmt;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChunkError {
    UnsupportedLanguage { language: String },
    ParseFailure { message: String },
    OversizedSemanticUnit { measured: usize, limit: usize },
    SizerFailure { message: String },
    EmbeddingFailure { message: String },
    InvalidConfiguration { message: String },
}

impl ChunkError {
    pub fn invalid_configuration(message: impl Into<String>) -> Self {
        Self::InvalidConfiguration {
            message: message.into(),
        }
    }

    pub fn embedding_failure(message: impl Into<String>) -> Self {
        Self::EmbeddingFailure {
            message: message.into(),
        }
    }

    pub fn unsupported_language(language: impl Into<String>) -> Self {
        Self::UnsupportedLanguage {
            language: language.into(),
        }
    }
}

impl fmt::Display for ChunkError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::UnsupportedLanguage { language } => {
                write!(f, "unsupported language: {language}")
            }
            Self::ParseFailure { message } => write!(f, "parse failure: {message}"),
            Self::OversizedSemanticUnit { measured, limit } => write!(
                f,
                "semantic unit size {measured} exceeds configured limit {limit}"
            ),
            Self::SizerFailure { message } => write!(f, "sizer failure: {message}"),
            Self::EmbeddingFailure { message } => write!(f, "embedding failure: {message}"),
            Self::InvalidConfiguration { message } => {
                write!(f, "invalid configuration: {message}")
            }
        }
    }
}

impl std::error::Error for ChunkError {}