Skip to main content

cognee_chunking/
cut_type.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3
4/// Describes how a chunk boundary was determined.
5#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
6pub enum CutType {
7    /// Sentence ending followed by a newline (paragraph boundary).
8    ParagraphEnd,
9    /// Sentence ending punctuation without a newline.
10    SentenceEnd,
11    /// Text ended mid-sentence (no ending punctuation).
12    SentenceCut,
13    /// Text ended mid-word.
14    Word,
15    /// Single-chunk emission for DLT-ingested rows.
16    DltRow,
17    /// Row boundary within CSV/DLT chunking (mid-row split).
18    RowCut,
19    /// End of a complete row in CSV/DLT chunking.
20    RowEnd,
21}
22
23impl fmt::Display for CutType {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            CutType::ParagraphEnd => write!(f, "paragraph_end"),
27            CutType::SentenceEnd => write!(f, "sentence_end"),
28            CutType::SentenceCut => write!(f, "sentence_cut"),
29            CutType::Word => write!(f, "word"),
30            CutType::DltRow => write!(f, "dlt_row"),
31            CutType::RowCut => write!(f, "row_cut"),
32            CutType::RowEnd => write!(f, "row_end"),
33        }
34    }
35}
36
37#[cfg(test)]
38mod tests {
39    use super::*;
40
41    #[test]
42    fn display_formats() {
43        assert_eq!(CutType::ParagraphEnd.to_string(), "paragraph_end");
44        assert_eq!(CutType::SentenceEnd.to_string(), "sentence_end");
45        assert_eq!(CutType::SentenceCut.to_string(), "sentence_cut");
46        assert_eq!(CutType::Word.to_string(), "word");
47        assert_eq!(CutType::DltRow.to_string(), "dlt_row");
48        assert_eq!(CutType::RowCut.to_string(), "row_cut");
49        assert_eq!(CutType::RowEnd.to_string(), "row_end");
50    }
51}