use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Sentence {
pub text: String,
pub start_index: usize,
pub end_index: usize,
pub token_count: usize,
}
impl Sentence {
pub fn new(text: &str, start_index: usize, end_index: usize, token_count: usize) -> Self {
Self {
text: text.to_string(),
start_index,
end_index,
token_count,
}
}
pub fn len(&self) -> usize {
self.text.len()
}
pub fn is_empty(&self) -> bool{
self.text.is_empty()
}
}
impl fmt::Display for Sentence {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Sentence(text: {}, start_index: {}, end_index: {}, token_count: {})",
self.text, self.start_index, self.end_index, self.token_count
)
}
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SentenceChunk {
pub text: String,
pub start_index: usize,
pub end_index: usize,
pub token_count: usize,
pub sentences: Option<Vec<Sentence>>,
}
impl SentenceChunk {
pub fn new(
text: &str,
start_index: usize,
end_index: usize,
token_count: usize,
sentences: Option<Vec<Sentence>>,
) -> Self {
Self {
text: text.to_string(),
start_index,
end_index,
token_count,
sentences,
}
}
pub fn copy(&self) -> Self {
Self {
text: self.text.clone(),
start_index: self.start_index,
end_index: self.end_index,
token_count: self.token_count,
sentences: self.sentences.clone(),
}
}
pub fn len(&self) -> usize {
self.text.len()
}
pub fn is_empty(&self) -> bool{
self.text.is_empty()
}
}
impl fmt::Display for SentenceChunk {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SentenceChunk(text: {}, start_index: {}, end_index: {}, token_count: {}, sentences: {:?})",
self.text, self.start_index, self.end_index, self.token_count, self.sentences
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sentence_new() {
let sentence = Sentence::new("Hello, world!", 0, 12, 2);
assert_eq!(sentence.text, "Hello, world!");
assert_eq!(sentence.start_index, 0);
assert_eq!(sentence.end_index, 12);
assert_eq!(sentence.token_count, 2);
}
#[test]
fn test_sentence_len() {
let sentence = Sentence::new("Hello, world!", 0, 12, 2);
assert_eq!(sentence.len(), 13);
}
#[test]
fn test_sentence_display() {
let sentence = Sentence::new("Hello, world!", 0, 12, 2);
assert_eq!(
format!("{}", sentence),
"Sentence(text: Hello, world!, start_index: 0, end_index: 12, token_count: 2)"
);
}
#[test]
fn test_sentence_chunk_new() {
let sentence_chunk = SentenceChunk::new("Hello, world!", 0, 12, 2, None);
assert_eq!(sentence_chunk.text, "Hello, world!");
assert_eq!(sentence_chunk.start_index, 0);
assert_eq!(sentence_chunk.end_index, 12);
assert_eq!(sentence_chunk.token_count, 2);
}
#[test]
#[cfg(feature = "json")]
fn test_sentence_chunk_serde() {
let sentence_chunk = SentenceChunk::new("Hello, world!", 0, 12, 2, None);
let serialized = serde_json::to_string(&sentence_chunk).unwrap();
let deserialized: SentenceChunk = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.text, "Hello, world!");
assert_eq!(deserialized.start_index, 0);
assert_eq!(deserialized.end_index, 12);
assert_eq!(deserialized.token_count, 2);
}
}