use serde::{Deserialize, Serialize};
use std::num::NonZeroU32;
use std::path::PathBuf;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ChunkId(NonZeroU32);
impl ChunkId {
pub fn new(value: NonZeroU32) -> Self {
Self(value)
}
pub fn from_u32(value: u32) -> Option<Self> {
NonZeroU32::new(value).map(Self)
}
pub fn value(&self) -> u32 {
self.0.get()
}
pub fn get(&self) -> u32 {
self.0.get()
}
pub fn to_bytes(&self) -> [u8; 4] {
self.0.get().to_le_bytes()
}
pub fn from_bytes(bytes: [u8; 4]) -> Option<Self> {
let value = u32::from_le_bytes(bytes);
Self::from_u32(value)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CollectionId(NonZeroU32);
impl CollectionId {
pub fn new(value: NonZeroU32) -> Self {
Self(value)
}
pub fn from_u32(value: u32) -> Option<Self> {
NonZeroU32::new(value).map(Self)
}
pub fn value(&self) -> u32 {
self.0.get()
}
pub fn get(&self) -> u32 {
self.0.get()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentChunk {
pub id: ChunkId,
pub collection_id: CollectionId,
pub source_path: PathBuf,
pub byte_range: (usize, usize),
pub heading_context: Vec<String>,
pub content: String,
}
impl DocumentChunk {
pub fn new(
id: ChunkId,
collection_id: CollectionId,
source_path: PathBuf,
byte_range: (usize, usize),
heading_context: Vec<String>,
content: String,
) -> Self {
Self {
id,
collection_id,
source_path,
byte_range,
heading_context,
content,
}
}
pub fn preview(&self, max_chars: usize) -> &str {
if self.content.len() <= max_chars {
&self.content
} else {
let mut end = max_chars;
while end > 0 && !self.content.is_char_boundary(end) {
end -= 1;
}
&self.content[..end]
}
}
pub fn char_count(&self) -> usize {
self.content.chars().count()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileState {
pub path: PathBuf,
#[serde(default)]
pub collection: String,
pub content_hash: String,
pub chunk_ids: Vec<ChunkId>,
pub last_indexed: u64,
#[serde(default)]
pub mtime: u64,
}
impl FileState {
pub fn new(
path: PathBuf,
collection: String,
content_hash: String,
chunk_ids: Vec<ChunkId>,
mtime: u64,
) -> Self {
Self {
path,
collection,
content_hash,
chunk_ids,
last_indexed: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0),
mtime,
}
}
pub fn has_changed(&self, new_hash: &str) -> bool {
self.content_hash != new_hash
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_id_roundtrip() {
let id = ChunkId::from_u32(42).unwrap();
let bytes = id.to_bytes();
let recovered = ChunkId::from_bytes(bytes).unwrap();
assert_eq!(id, recovered);
}
#[test]
fn test_chunk_id_zero_returns_none() {
assert!(ChunkId::from_u32(0).is_none());
}
#[test]
fn test_document_chunk_preview() {
let chunk = DocumentChunk::new(
ChunkId::from_u32(1).unwrap(),
CollectionId::from_u32(1).unwrap(),
PathBuf::from("test.md"),
(0, 100),
vec!["Chapter 1".to_string()],
"Hello, world! This is a test.".to_string(),
);
assert_eq!(chunk.preview(5), "Hello");
assert_eq!(chunk.preview(100), "Hello, world! This is a test.");
}
#[test]
fn test_file_state_change_detection() {
let state = FileState::new(
PathBuf::from("test.md"),
"docs".to_string(),
"abc123".to_string(),
vec![ChunkId::from_u32(1).unwrap()],
1700000000,
);
assert!(!state.has_changed("abc123"));
assert!(state.has_changed("def456"));
}
}