use crate::checkpoint::{Checkpoint, CheckpointMetadataBuilder};
use crate::compression::{CompressionEngine, CompressionStrategy};
use crate::error::{Result, TitorError};
use crate::file_tracking::{FileTracker, create_manifest, create_file_map};
use crate::merkle::{MerkleTree, FileEntryHashBuilder};
use crate::storage::Storage;
use crate::timeline::Timeline;
use crate::types::*;
use crate::utils;
use crate::verification::{CheckpointVerifier, TimelineVerificationReport, VerificationReport};
use parking_lot::{Mutex, RwLock};
use rayon::prelude::*;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
use tracing::{debug, info, instrument, trace, warn};
use serde_json;
pub struct Titor {
root_path: PathBuf,
storage: Arc<Storage>,
timeline: Arc<RwLock<Timeline>>,
config: TitorConfig,
auto_checkpoint_strategy: Arc<Mutex<AutoCheckpointStrategy>>,
hooks: Arc<Mutex<Vec<Box<dyn CheckpointHook>>>>,
file_tracker: FileTracker,
}
impl std::fmt::Debug for Titor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Titor")
.field("root_path", &self.root_path)
.field("storage", &self.storage)
.field("timeline", &self.timeline)
.field("config", &self.config)
.field("auto_checkpoint_strategy", &self.auto_checkpoint_strategy)
.field("hooks", &format!("<{} hooks>", self.hooks.lock().len()))
.field("file_tracker", &self.file_tracker)
.finish()
}
}
impl Titor {
#[instrument(skip(storage_path))]
pub fn init(root_path: PathBuf, storage_path: PathBuf) -> Result<Self> {
info!("Initializing Titor for {:?}", root_path);
if !root_path.exists() {
return Err(TitorError::internal(format!(
"Root path {:?} does not exist",
root_path
)));
}
let config = TitorConfig {
root_path: root_path.clone(),
storage_path: storage_path.clone(),
max_file_size: 0,
parallel_workers: num_cpus::get(),
ignore_patterns: vec![],
compression_strategy: "fast".to_string(),
follow_symlinks: false,
version: env!("CARGO_PKG_VERSION").to_string(),
};
let compression = CompressionEngine::new(CompressionStrategy::Fast);
let storage = Storage::init_or_open(storage_path, config.clone(), compression)?;
let file_tracker = FileTracker::new(root_path.clone());
let gitignore_path = root_path.join(".gitignore");
if let Err(e) = utils::ensure_gitignore_has_entry(&gitignore_path, ".titor") {
warn!("Failed to update .gitignore: {}", e);
} else {
debug!("Ensured .titor is in .gitignore");
}
Ok(Self {
root_path,
storage: Arc::new(storage),
timeline: Arc::new(RwLock::new(Timeline::new())),
config,
auto_checkpoint_strategy: Arc::new(Mutex::new(AutoCheckpointStrategy::Disabled)),
hooks: Arc::new(Mutex::new(Vec::new())),
file_tracker,
})
}
#[instrument(skip(storage_path))]
pub fn open(root_path: PathBuf, storage_path: PathBuf) -> Result<Self> {
info!("Opening Titor storage at {:?}", storage_path);
let compression = CompressionEngine::new(CompressionStrategy::Fast);
let storage = Storage::open(storage_path, compression)?;
let config = {
let metadata = storage.metadata().read();
metadata.config.clone()
};
let file_tracker = FileTracker::new(root_path.clone())
.with_ignore_patterns(config.ignore_patterns.clone())
.with_max_file_size(config.max_file_size)
.with_follow_symlinks(config.follow_symlinks)
.with_parallel_workers(config.parallel_workers);
let timeline = Self::load_timeline(&storage)?;
let gitignore_path = root_path.join(".gitignore");
if let Err(e) = utils::ensure_gitignore_has_entry(&gitignore_path, ".titor") {
warn!("Failed to update .gitignore: {}", e);
} else {
debug!("Ensured .titor is in .gitignore");
}
Ok(Self {
root_path,
storage: Arc::new(storage),
timeline: Arc::new(RwLock::new(timeline)),
config,
auto_checkpoint_strategy: Arc::new(Mutex::new(AutoCheckpointStrategy::Disabled)),
hooks: Arc::new(Mutex::new(Vec::new())),
file_tracker,
})
}
#[instrument(skip(self))]
pub fn checkpoint(&mut self, description: Option<String>) -> Result<Checkpoint> {
info!("Creating checkpoint: {:?}", description);
let start = Instant::now();
let parent_id = self.timeline.read().current_checkpoint_id.clone();
debug!("Scanning directory for changes");
let mut file_entries = self.file_tracker.scan_directory(Some(|info: ProgressInfo| {
trace!("Scanned: {:?}", info.current_item);
}))?;
debug!("Storing {} files", file_entries.len());
let storage = Arc::clone(&self.storage);
let root_path = self.root_path.clone();
let processing_results: Vec<Result<(usize, u64)>> = file_entries
.par_iter_mut()
.enumerate()
.map(|(idx, entry)| -> Result<(usize, u64)> {
let file_path = root_path.join(&entry.path);
let mut compressed_size = 0u64;
if entry.is_directory {
if !file_path.exists() {
fs::create_dir_all(&file_path)?;
}
entry.size = 0;
} else if entry.is_symlink {
if let Some(target) = &entry.symlink_target {
let content_str = target.to_string_lossy();
let content = content_str.as_bytes();
trace!("Storing symlink {:?} -> {:?}", entry.path, target);
let (_, comp_size) = storage.store_object(content, &entry.path)?;
entry.size = content.len() as u64;
compressed_size = comp_size;
}
} else if file_path.exists() {
let content = fs::read(&file_path)?;
if content.len() as u64 != entry.size {
let actual_hash = utils::hash_data(&content);
entry.content_hash = actual_hash.clone();
let mut builder = FileEntryHashBuilder::new();
entry.combined_hash = builder.combined_hash(&entry.content_hash, &entry.metadata_hash);
entry.size = content.len() as u64;
}
let (_, comp_size) = storage.store_object(&content, &entry.path)?;
compressed_size = comp_size;
}
Ok((idx, compressed_size))
})
.collect();
let mut compressed_size = 0u64;
for result in processing_results {
let (_, comp_size) = result?;
compressed_size += comp_size;
}
let total_size: u64 = file_entries.iter().map(|e| e.size).sum();
debug!("Building Merkle tree for {} files (post-storage)", file_entries.len());
let merkle_tree = MerkleTree::from_entries(&file_entries)?;
let merkle_root = merkle_tree.root_hash().unwrap_or_default();
let change_stats = if let Some(parent_id) = &parent_id {
let parent_manifest = self.storage.load_manifest(parent_id)?;
self.file_tracker.detect_changes(&parent_manifest)?
} else {
ChangeStats {
files_added: file_entries.len(),
bytes_added: file_entries.iter().map(|e| e.size).sum(),
..Default::default()
}
};
for hook in self.hooks.lock().iter() {
hook.pre_checkpoint(&change_stats)?;
}
let metadata = CheckpointMetadataBuilder::new()
.file_count(file_entries.len())
.total_size(total_size)
.compressed_size(compressed_size)
.files_changed(change_stats.total_operations())
.bytes_changed(change_stats.net_size_change() as u64)
.build();
let checkpoint = Checkpoint::new(
parent_id,
description,
metadata,
merkle_root.clone(),
);
self.storage.store_checkpoint(&checkpoint)?;
let manifest = create_manifest(
checkpoint.id.clone(),
file_entries,
merkle_root,
);
self.storage.store_manifest(&manifest)?;
self.storage.flush_ref_counts()?;
{
let mut timeline = self.timeline.write();
timeline.add_checkpoint(checkpoint.clone())?;
timeline.set_current(&checkpoint.id)?;
}
self.save_timeline()?;
for hook in self.hooks.lock().iter() {
hook.post_checkpoint(&checkpoint)?;
}
let duration = start.elapsed();
info!(
"Created checkpoint {} in {:?} ({} files, {} bytes)",
checkpoint.short_id(),
duration,
manifest.file_count,
utils::format_bytes(total_size)
);
Ok(checkpoint)
}
#[instrument(skip(self))]
pub fn restore(&mut self, checkpoint_id: &str) -> Result<RestoreResult> {
info!("Restoring to checkpoint {}", &checkpoint_id[..8.min(checkpoint_id.len())]);
let start = Instant::now();
let checkpoint = self.storage.load_checkpoint(checkpoint_id)?;
let manifest = self.storage.load_manifest(checkpoint_id)?;
let current_checkpoint = self.timeline.read()
.current_checkpoint()
.cloned();
if let Some(current) = ¤t_checkpoint {
for hook in self.hooks.lock().iter() {
hook.pre_restore(current, &checkpoint)?;
}
}
let mut files_restored = 0;
let mut files_deleted = 0;
let mut bytes_written = 0u64;
let mut bytes_deleted = 0u64;
let mut warnings = Vec::new();
let target_files = create_file_map(&manifest.files);
let current_files = self.file_tracker.scan_directory::<fn(ProgressInfo)>(None)?;
let mut directories_to_check = std::collections::HashSet::new();
for current_file in ¤t_files {
if !target_files.contains_key(current_file.path.as_path()) {
let file_path = self.root_path.join(¤t_file.path);
if file_path.exists() {
if let Some(parent) = file_path.parent() {
let mut parent = parent.to_path_buf();
while parent != self.root_path && parent.starts_with(&self.root_path) {
directories_to_check.insert(parent.clone());
if let Some(p) = parent.parent() {
parent = p.to_path_buf();
} else {
break;
}
}
}
if current_file.is_directory {
if let Err(e) = utils::remove_dir_if_empty(&file_path) {
trace!("Could not remove directory {:?}: {}", file_path, e);
}
} else {
match fs::remove_file(&file_path) {
Ok(_) => {
files_deleted += 1;
bytes_deleted += current_file.size;
trace!("Deleted file: {:?}", current_file.path);
}
Err(e) => {
warnings.push(format!(
"Failed to delete {:?}: {}",
current_file.path, e
));
}
}
}
}
}
}
let mut dirs_to_check: Vec<_> = directories_to_check.into_iter().collect();
dirs_to_check.sort_by(|a, b| b.components().count().cmp(&a.components().count()));
for dir in dirs_to_check {
if dir.exists() && dir != self.root_path {
if let Err(e) = utils::remove_dir_if_empty(&dir) {
trace!("Could not remove directory {:?}: {}", dir, e);
}
}
}
debug!("Restoring {} files", manifest.files.len());
for entry in &manifest.files {
let file_path = self.root_path.join(&entry.path);
if let Some(parent) = file_path.parent() {
fs::create_dir_all(parent)?;
}
if entry.is_directory {
if !file_path.exists() {
fs::create_dir_all(&file_path)?;
utils::set_permissions(&file_path, entry.permissions)?;
files_restored += 1;
}
} else if entry.is_symlink {
if let Some(target) = &entry.symlink_target {
if file_path.exists() || file_path.symlink_metadata().is_ok() {
trace!("Removing existing file/symlink at {:?}", file_path);
fs::remove_file(&file_path).ok();
}
let final_target = if target.is_relative() {
target.clone()
} else {
if !target.exists() {
warnings.push(format!(
"Symlink target {:?} is absolute and does not exist",
target
));
}
target.clone()
};
trace!("Creating symlink {:?} -> {:?}", file_path, final_target);
match utils::create_symlink(&final_target, &file_path) {
Ok(_) => {
files_restored += 1;
trace!("Successfully created symlink");
}
Err(e) => {
warnings.push(format!(
"Failed to create symlink {:?} -> {:?}: {}",
entry.path, final_target, e
));
}
}
}
} else {
match self.storage.load_object(&entry.content_hash) {
Ok(content) => {
fs::write(&file_path, &content)?;
utils::set_permissions(&file_path, entry.permissions)?;
files_restored += 1;
bytes_written += content.len() as u64;
}
Err(e) => {
warnings.push(format!(
"Failed to restore {:?}: {}",
entry.path, e
));
}
}
}
}
self.timeline.write().set_current(checkpoint_id)?;
self.save_timeline()?;
let result = RestoreResult {
checkpoint_id: checkpoint_id.to_string(),
files_restored,
files_deleted,
bytes_written,
bytes_deleted,
duration_ms: start.elapsed().as_millis() as u64,
warnings,
};
for hook in self.hooks.lock().iter() {
hook.post_restore(&result)?;
}
info!(
"Restored to checkpoint {} in {}ms ({} files restored, {} deleted)",
&checkpoint_id[..8.min(checkpoint_id.len())],
result.duration_ms,
result.files_restored,
result.files_deleted
);
Ok(result)
}
pub fn list_checkpoints(&self) -> Result<Vec<Checkpoint>> {
let timeline = self.timeline.read();
let mut checkpoints: Vec<_> = timeline.checkpoints.values().cloned().collect();
checkpoints.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
Ok(checkpoints)
}
pub fn get_timeline(&self) -> Result<Timeline> {
Ok(self.timeline.read().clone())
}
#[instrument(skip(self))]
pub fn fork(&mut self, checkpoint_id: &str, description: Option<String>) -> Result<Checkpoint> {
info!("Forking from checkpoint {}", &checkpoint_id[..8.min(checkpoint_id.len())]);
self.restore(checkpoint_id)?;
let fork_description = description.or_else(|| {
Some(format!("Fork from {}", &checkpoint_id[..8.min(checkpoint_id.len())]))
});
self.checkpoint(fork_description)
}
pub fn diff(&self, from_id: &str, to_id: &str) -> Result<CheckpointDiff> {
debug!("Computing diff between {} and {}",
&from_id[..8.min(from_id.len())],
&to_id[..8.min(to_id.len())]);
let from_manifest = self.storage.load_manifest(from_id)?;
let to_manifest = self.storage.load_manifest(to_id)?;
let from_map = create_file_map(&from_manifest.files);
let to_map = create_file_map(&to_manifest.files);
let mut added_files = Vec::new();
let mut modified_files = Vec::new();
let mut deleted_files = Vec::new();
let mut stats = ChangeStats::default();
for (path, to_entry) in &to_map {
match from_map.get(path) {
Some(from_entry) => {
if to_entry.content_hash != from_entry.content_hash {
modified_files.push(((*from_entry).clone(), (*to_entry).clone()));
stats.files_modified += 1;
stats.bytes_modified += to_entry.size;
stats.changed_files.push((*path).to_path_buf());
}
}
None => {
added_files.push((*to_entry).clone());
stats.files_added += 1;
stats.bytes_added += to_entry.size;
stats.changed_files.push((*path).to_path_buf());
}
}
}
for (path, from_entry) in &from_map {
if !to_map.contains_key(path) {
deleted_files.push((*from_entry).clone());
stats.files_deleted += 1;
stats.bytes_deleted += from_entry.size;
stats.changed_files.push((*path).to_path_buf());
}
}
Ok(CheckpointDiff {
from_id: from_id.to_string(),
to_id: to_id.to_string(),
added_files,
modified_files,
deleted_files,
stats,
})
}
pub fn diff_file(
&self,
from_id: &str,
to_id: &str,
file_path: &Path,
options: crate::types::DiffOptions,
) -> Result<crate::types::FileDiff> {
use crate::diff;
debug!("Computing file diff for {:?} between {} and {}",
file_path,
&from_id[..8.min(from_id.len())],
&to_id[..8.min(to_id.len())]);
let from_manifest = self.storage.load_manifest(from_id)?;
let to_manifest = self.storage.load_manifest(to_id)?;
let from_entry = from_manifest.files.iter()
.find(|e| e.path == file_path)
.ok_or_else(|| TitorError::internal(
format!("File {:?} not found in checkpoint {}", file_path, from_id)
))?;
let to_entry = to_manifest.files.iter()
.find(|e| e.path == file_path)
.ok_or_else(|| TitorError::internal(
format!("File {:?} not found in checkpoint {}", file_path, to_id)
))?;
if from_entry.content_hash == to_entry.content_hash {
return Ok(crate::types::FileDiff {
path: file_path.to_path_buf(),
from_hash: from_entry.content_hash.clone(),
to_hash: to_entry.content_hash.clone(),
is_binary: false,
hunks: vec![],
lines_added: 0,
lines_deleted: 0,
});
}
if from_entry.size > options.max_file_size || to_entry.size > options.max_file_size {
return Err(TitorError::internal(
format!("File {:?} exceeds maximum size for diff ({} bytes)",
file_path, options.max_file_size)
));
}
let from_content = self.storage.load_object(&from_entry.content_hash)?;
let to_content = self.storage.load_object(&to_entry.content_hash)?;
diff::create_file_diff(
file_path,
&from_entry.content_hash,
&to_entry.content_hash,
&from_content,
&to_content,
&options,
)
}
pub fn diff_detailed(
&self,
from_id: &str,
to_id: &str,
options: crate::types::DiffOptions,
) -> Result<crate::types::DetailedCheckpointDiff> {
use crate::diff;
debug!("Computing detailed diff between {} and {}",
&from_id[..8.min(from_id.len())],
&to_id[..8.min(to_id.len())]);
let basic_diff = self.diff(from_id, to_id)?;
let mut file_diffs = Vec::new();
let mut total_lines_added = 0;
let mut total_lines_deleted = 0;
for (from_entry, to_entry) in &basic_diff.modified_files {
if from_entry.size > options.max_file_size || to_entry.size > options.max_file_size {
debug!("Skipping large file {:?} for line diff", from_entry.path);
continue;
}
match (
self.storage.load_object(&from_entry.content_hash),
self.storage.load_object(&to_entry.content_hash)
) {
(Ok(from_content), Ok(to_content)) => {
match diff::create_file_diff(
&from_entry.path,
&from_entry.content_hash,
&to_entry.content_hash,
&from_content,
&to_content,
&options,
) {
Ok(file_diff) => {
total_lines_added += file_diff.lines_added;
total_lines_deleted += file_diff.lines_deleted;
file_diffs.push(file_diff);
}
Err(e) => {
warn!("Failed to compute diff for {:?}: {}", from_entry.path, e);
}
}
}
(Err(e), _) | (_, Err(e)) => {
warn!("Failed to load content for {:?}: {}", from_entry.path, e);
}
}
}
Ok(crate::types::DetailedCheckpointDiff {
basic_diff,
file_diffs,
total_lines_added,
total_lines_deleted,
})
}
pub fn set_auto_checkpoint(&mut self, strategy: AutoCheckpointStrategy) {
*self.auto_checkpoint_strategy.lock() = strategy;
}
#[instrument(skip(self))]
pub fn gc(&self) -> Result<GcStats> {
info!("Starting garbage collection");
let start = Instant::now();
let mut stats = GcStats::default();
let unreferenced = self.storage.get_unreferenced_objects()?;
stats.unreferenced_objects = unreferenced.clone();
stats.objects_examined = self.storage.list_all_objects()?.len();
for hash in &unreferenced {
match self.storage.get_object_size(hash) {
Ok(size) => {
match self.storage.delete_object(hash) {
Ok(_) => {
stats.objects_deleted += 1;
stats.bytes_reclaimed += size;
}
Err(e) => {
warn!("Failed to delete object {}: {}", &hash[..8], e);
}
}
}
Err(e) => {
warn!("Failed to get size for object {}: {}", &hash[..8], e);
if self.storage.delete_object(hash).is_ok() {
stats.objects_deleted += 1;
}
}
}
}
stats.duration_ms = start.elapsed().as_millis() as u64;
info!(
"Garbage collection complete in {}ms: {} objects deleted, {} bytes reclaimed",
stats.duration_ms,
stats.objects_deleted,
stats.bytes_reclaimed
);
Ok(stats)
}
#[instrument(skip(self))]
pub fn gc_analyze(&self) -> Result<GcStats> {
info!("Analyzing garbage collection (dry run)");
let start = Instant::now();
let mut stats = GcStats::default();
let unreferenced = self.storage.get_unreferenced_objects()?;
stats.unreferenced_objects = unreferenced.clone();
stats.objects_examined = self.storage.list_all_objects()?.len();
for hash in &unreferenced {
match self.storage.get_object_size(hash) {
Ok(size) => {
stats.bytes_reclaimed += size;
}
Err(e) => {
warn!("Failed to get size for object {}: {}", &hash[..8], e);
}
}
}
stats.duration_ms = start.elapsed().as_millis() as u64;
info!(
"Garbage collection analysis complete in {}ms: {} objects would be deleted, {} bytes would be reclaimed",
stats.duration_ms,
unreferenced.len(),
stats.bytes_reclaimed
);
Ok(stats)
}
pub fn verify_checkpoint(&self, checkpoint_id: &str) -> Result<VerificationReport> {
let checkpoint = self.storage.load_checkpoint(checkpoint_id)?;
let verifier = CheckpointVerifier::new(&self.storage);
verifier.verify_complete(&checkpoint)
}
pub fn verify_timeline(&self) -> Result<TimelineVerificationReport> {
let timeline = self.timeline.read();
let verifier = CheckpointVerifier::new(&self.storage);
verifier.verify_timeline(&timeline)
}
pub fn compute_current_merkle_root(&self) -> Result<String> {
let entries = self.file_tracker.scan_directory::<fn(ProgressInfo)>(None)?;
let tree = MerkleTree::from_entries(&entries)?;
Ok(tree.root_hash().unwrap_or_default())
}
pub fn add_hook(&mut self, hook: Box<dyn CheckpointHook>) {
self.hooks.lock().push(hook);
}
fn load_timeline(storage: &Storage) -> Result<Timeline> {
let mut timeline = Timeline::new();
for checkpoint_id in storage.list_checkpoints()? {
let checkpoint = storage.load_checkpoint(&checkpoint_id)?;
timeline.add_checkpoint(checkpoint)?;
}
let timeline_path = storage.root().join("timeline.json");
if timeline_path.exists() {
let timeline_data = fs::read_to_string(&timeline_path)?;
if let Ok(timeline_state) = serde_json::from_str::<TimelineState>(&timeline_data) {
timeline.current_checkpoint_id = timeline_state.current_checkpoint_id;
debug!("Loaded current checkpoint: {:?}", timeline.current_checkpoint_id);
}
}
Ok(timeline)
}
fn save_timeline(&self) -> Result<()> {
let timeline_state = TimelineState {
current_checkpoint_id: self.timeline.read().current_checkpoint_id.clone(),
version: 1,
};
let timeline_path = self.storage.root().join("timeline.json");
let timeline_json = serde_json::to_string_pretty(&timeline_state)?;
utils::atomic_write(&timeline_path, timeline_json.as_bytes())?;
debug!("Saved timeline state with current checkpoint: {:?}", timeline_state.current_checkpoint_id);
Ok(())
}
}
#[derive(Debug)]
pub struct TitorBuilder {
compression_strategy: CompressionStrategy,
ignore_patterns: Vec<String>,
max_file_size: u64,
parallel_workers: usize,
follow_symlinks: bool,
}
impl TitorBuilder {
pub fn new() -> Self {
Self {
compression_strategy: CompressionStrategy::default(),
ignore_patterns: Vec::new(),
max_file_size: 0,
parallel_workers: num_cpus::get(),
follow_symlinks: false,
}
}
pub fn compression_strategy(mut self, strategy: CompressionStrategy) -> Self {
self.compression_strategy = strategy;
self
}
pub fn ignore_patterns(mut self, patterns: Vec<String>) -> Self {
self.ignore_patterns = patterns;
self
}
pub fn max_file_size(mut self, size: u64) -> Self {
self.max_file_size = size;
self
}
pub fn parallel_workers(mut self, count: usize) -> Self {
self.parallel_workers = count.max(1);
self
}
pub fn follow_symlinks(mut self, follow: bool) -> Self {
self.follow_symlinks = follow;
self
}
pub fn build(self, root_path: PathBuf, storage_path: PathBuf) -> Result<Titor> {
let mut effective_ignore_patterns = self.ignore_patterns.clone();
effective_ignore_patterns.push(".titor".to_string());
effective_ignore_patterns.push(".titor/".to_string());
let storage_metadata_path = storage_path.join("metadata.json");
if storage_metadata_path.exists() {
Titor::open(root_path, storage_path)
} else {
if storage_path.exists() && storage_path.read_dir()?.next().is_none() {
std::fs::remove_dir(&storage_path).ok();
}
let mut titor = Titor::init(root_path.clone(), storage_path)?;
titor.config.ignore_patterns = effective_ignore_patterns.clone();
titor.config.max_file_size = self.max_file_size;
titor.config.parallel_workers = self.parallel_workers;
titor.config.follow_symlinks = self.follow_symlinks;
titor.file_tracker = FileTracker::new(root_path)
.with_ignore_patterns(effective_ignore_patterns.clone())
.with_max_file_size(self.max_file_size)
.with_follow_symlinks(self.follow_symlinks)
.with_parallel_workers(self.parallel_workers);
titor.storage.update_metadata(|metadata| {
metadata.config = titor.config.clone();
})?;
Ok(titor)
}
}
}
impl Default for TitorBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn create_test_titor() -> (Titor, TempDir, TempDir) {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
let titor = TitorBuilder::new()
.build(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
)
.unwrap();
(titor, root_dir, storage_dir)
}
#[test]
fn test_titor_init() {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
std::fs::remove_dir_all(storage_dir.path()).ok();
let _titor = Titor::init(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
assert!(storage_dir.path().join("metadata.json").exists());
assert!(storage_dir.path().join("checkpoints").exists());
assert!(storage_dir.path().join("objects").exists());
}
#[test]
fn test_checkpoint_creation() {
let (mut titor, root_dir, _storage_dir) = create_test_titor();
fs::write(root_dir.path().join("file1.txt"), "content1").unwrap();
fs::write(root_dir.path().join("file2.txt"), "content2").unwrap();
let checkpoint = titor.checkpoint(Some("Initial state".to_string())).unwrap();
assert!(checkpoint.parent_id.is_none());
assert_eq!(checkpoint.metadata.file_count, 2);
assert!(checkpoint.metadata.total_size > 0);
}
#[test]
fn test_checkpoint_restore() {
let (mut titor, root_dir, _storage_dir) = create_test_titor();
fs::write(root_dir.path().join("file1.txt"), "version1").unwrap();
let checkpoint1 = titor.checkpoint(Some("Version 1".to_string())).unwrap();
fs::write(root_dir.path().join("file1.txt"), "version2").unwrap();
fs::write(root_dir.path().join("file2.txt"), "new file").unwrap();
let _checkpoint2 = titor.checkpoint(Some("Version 2".to_string())).unwrap();
let result = titor.restore(&checkpoint1.id).unwrap();
assert_eq!(result.files_restored, 1);
assert_eq!(result.files_deleted, 1);
let content = fs::read_to_string(root_dir.path().join("file1.txt")).unwrap();
assert_eq!(content, "version1");
assert!(!root_dir.path().join("file2.txt").exists());
}
#[test]
fn test_diff() {
let (mut titor, root_dir, _storage_dir) = create_test_titor();
fs::write(root_dir.path().join("file1.txt"), "content1").unwrap();
let checkpoint1 = titor.checkpoint(None).unwrap();
fs::write(root_dir.path().join("file1.txt"), "modified").unwrap();
fs::write(root_dir.path().join("file2.txt"), "new").unwrap();
let checkpoint2 = titor.checkpoint(None).unwrap();
let diff = titor.diff(&checkpoint1.id, &checkpoint2.id).unwrap();
assert_eq!(diff.added_files.len(), 1);
assert_eq!(diff.modified_files.len(), 1);
assert_eq!(diff.deleted_files.len(), 0);
}
#[test]
fn test_storage_dir_ignored() {
use std::fs;
let root_dir = TempDir::new().unwrap();
let storage_path = root_dir.path().join(".titor");
fs::create_dir_all(&storage_path).unwrap();
let mut titor = TitorBuilder::new()
.build(root_dir.path().to_path_buf(), storage_path.clone())
.unwrap();
fs::write(root_dir.path().join("data.txt"), "hello").unwrap();
let checkpoint = titor.checkpoint(None).unwrap();
assert_eq!(checkpoint.metadata.file_count, 1);
let manifest = titor.storage.load_manifest(&checkpoint.id).unwrap();
assert!(manifest.files.iter().all(|e| !e.path.starts_with(".titor")));
}
#[test]
fn test_current_checkpoint_updates() {
use std::fs;
let (mut titor, root_dir, _storage_dir) = create_test_titor();
fs::write(root_dir.path().join("file1.txt"), "one").unwrap();
let cp1 = titor.checkpoint(None).unwrap();
assert_eq!(titor.get_timeline().unwrap().current_checkpoint_id, Some(cp1.id.clone()));
fs::write(root_dir.path().join("file2.txt"), "two").unwrap();
let cp2 = titor.checkpoint(None).unwrap();
assert_eq!(titor.get_timeline().unwrap().current_checkpoint_id, Some(cp2.id.clone()));
}
#[test]
fn test_special_character_filenames() {
use std::fs;
let (mut titor, root_dir, _storage_dir) = create_test_titor();
let special_files = vec![
("file with spaces.txt", "content1"),
("file-with-dashes.txt", "content2"),
("file_with_underscores.txt", "content3"),
("file$with$dollar.txt", "content4"),
("file@with@at.txt", "content5"),
("file#with#hash.txt", "content6"),
("file(with)parens.txt", "content7"),
("file[with]brackets.txt", "content8"),
("file{with}braces.txt", "content9"),
("file'with'quotes.txt", "content10"),
("file\"with\"doublequotes.txt", "content11"),
("file🔥with🔥emoji.txt", "content12"),
("文件名.txt", "content13"), ("файл.txt", "content14"), ];
for (filename, content) in &special_files {
fs::write(root_dir.path().join(filename), content).unwrap();
}
let checkpoint = titor.checkpoint(Some("Special characters test".to_string())).unwrap();
assert_eq!(checkpoint.metadata.file_count, special_files.len());
for (filename, _) in &special_files {
fs::remove_file(root_dir.path().join(filename)).unwrap();
}
let result = titor.restore(&checkpoint.id).unwrap();
assert_eq!(result.files_restored, special_files.len());
assert!(result.warnings.is_empty(), "Warnings during restore: {:?}", result.warnings);
for (filename, expected_content) in &special_files {
let path = root_dir.path().join(filename);
assert!(path.exists(), "File {} was not restored", filename);
let content = fs::read_to_string(&path).unwrap();
assert_eq!(content, *expected_content, "Content mismatch for {}", filename);
}
}
#[test]
fn test_symlink_restoration() {
use std::fs;
let (mut titor, root_dir, _storage_dir) = create_test_titor();
let target_path = root_dir.path().join("target.txt");
let symlink_path = root_dir.path().join("symlink.txt");
fs::write(&target_path, "target content").unwrap();
utils::create_symlink(&PathBuf::from("target.txt"), &symlink_path).unwrap();
assert!(symlink_path.exists(), "Symlink was not created");
assert!(symlink_path.symlink_metadata().unwrap().file_type().is_symlink(), "Created file is not a symlink");
let checkpoint = titor.checkpoint(Some("Symlink test".to_string())).unwrap();
println!("Created checkpoint with {} files", checkpoint.metadata.file_count);
let manifest = titor.storage.load_manifest(&checkpoint.id).unwrap();
for entry in &manifest.files {
println!("Manifest entry: path={:?}, is_symlink={}, symlink_target={:?}",
entry.path, entry.is_symlink, entry.symlink_target);
}
fs::remove_file(&symlink_path).unwrap();
fs::remove_file(&target_path).unwrap();
let result = titor.restore(&checkpoint.id).unwrap();
println!("Restore result: {} files restored, warnings: {:?}", result.files_restored, result.warnings);
assert!(result.warnings.is_empty(), "Warnings during restore: {:?}", result.warnings);
for entry in fs::read_dir(root_dir.path()).unwrap() {
let entry = entry.unwrap();
let metadata = entry.metadata();
let is_symlink = if let Ok(m) = &metadata {
m.file_type().is_symlink()
} else {
entry.path().symlink_metadata()
.map(|m| m.file_type().is_symlink())
.unwrap_or(false)
};
println!("After restore: {:?} (symlink: {})",
entry.file_name(),
is_symlink);
}
assert!(symlink_path.exists() || symlink_path.symlink_metadata().is_ok(),
"Symlink was not restored");
assert!(symlink_path.symlink_metadata().unwrap().file_type().is_symlink(),
"Restored file is not a symlink");
let restored_target = utils::read_symlink(&symlink_path).unwrap();
assert_eq!(restored_target, PathBuf::from("target.txt"));
}
#[test]
fn test_compression_size_tracking() {
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let repetitive_content = "This is a test. ".repeat(10000); fs::write(temp_dir.path().join("repetitive.txt"), &repetitive_content).unwrap();
let binary_content: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
fs::write(temp_dir.path().join("binary.dat"), &binary_content).unwrap();
let small_content = "Small file";
fs::write(temp_dir.path().join("small.txt"), &small_content).unwrap();
let checkpoint = titor.checkpoint(Some("Compression test".to_string())).unwrap();
assert!(checkpoint.metadata.compressed_size < checkpoint.metadata.total_size,
"Compressed size ({}) should be less than total size ({})",
checkpoint.metadata.compressed_size,
checkpoint.metadata.total_size);
let compression_ratio = 1.0 - (checkpoint.metadata.compressed_size as f64 / checkpoint.metadata.total_size as f64);
assert!(compression_ratio > 0.1, "Compression ratio {:.2}% is too low", compression_ratio * 100.0);
println!("Compression test results:");
println!(" Total size: {} bytes", checkpoint.metadata.total_size);
println!(" Compressed size: {} bytes", checkpoint.metadata.compressed_size);
println!(" Compression ratio: {:.2}%", compression_ratio * 100.0);
}
#[test]
fn test_empty_directory_preservation() {
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
fs::create_dir(temp_dir.path().join("empty_dir")).unwrap();
fs::create_dir_all(temp_dir.path().join("nested/empty")).unwrap();
fs::create_dir(temp_dir.path().join("dir_with_file")).unwrap();
fs::write(temp_dir.path().join("dir_with_file/file.txt"), "content").unwrap();
let checkpoint = titor.checkpoint(Some("Empty dirs test".to_string())).unwrap();
fs::remove_dir_all(temp_dir.path().join("empty_dir")).unwrap();
fs::remove_dir_all(temp_dir.path().join("nested")).unwrap();
fs::remove_dir_all(temp_dir.path().join("dir_with_file")).unwrap();
let result = titor.restore(&checkpoint.id).unwrap();
assert!(result.warnings.is_empty(), "Warnings during restore: {:?}", result.warnings);
assert!(temp_dir.path().join("empty_dir").exists(), "Empty directory was not restored");
assert!(temp_dir.path().join("nested/empty").exists(), "Nested empty directory was not restored");
assert!(temp_dir.path().join("dir_with_file").exists(), "Directory with file was not restored");
assert!(temp_dir.path().join("dir_with_file/file.txt").exists(), "File in directory was not restored");
}
#[test]
fn test_line_diff_simple() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let file_path = temp_dir.path().join("test.txt");
fs::write(&file_path, "line1\nline2\nline3\n").unwrap();
let checkpoint1 = titor.checkpoint(Some("Initial".to_string())).unwrap();
fs::write(&file_path, "line1\nline2 modified\nline3\nline4\n").unwrap();
let checkpoint2 = titor.checkpoint(Some("Modified".to_string())).unwrap();
let options = DiffOptions::default();
let file_diff = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("test.txt"),
options
).unwrap();
assert!(!file_diff.is_binary);
assert_eq!(file_diff.lines_added, 2); assert_eq!(file_diff.lines_deleted, 1); assert!(file_diff.hunks.len() > 0);
}
#[test]
fn test_line_diff_binary_file() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let file_path = temp_dir.path().join("binary.dat");
let binary_content: Vec<u8> = vec![0, 1, 2, 3, 255, 254, 253, 0];
fs::write(&file_path, &binary_content).unwrap();
let checkpoint1 = titor.checkpoint(Some("Binary v1".to_string())).unwrap();
let modified_binary: Vec<u8> = vec![0, 1, 2, 3, 4, 5, 6, 7, 0];
fs::write(&file_path, &modified_binary).unwrap();
let checkpoint2 = titor.checkpoint(Some("Binary v2".to_string())).unwrap();
let options = DiffOptions::default();
let file_diff = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("binary.dat"),
options
).unwrap();
assert!(file_diff.is_binary);
assert_eq!(file_diff.hunks.len(), 0); assert_eq!(file_diff.lines_added, 0);
assert_eq!(file_diff.lines_deleted, 0);
}
#[test]
fn test_line_diff_new_file() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
fs::write(temp_dir.path().join("other.txt"), "other content").unwrap();
let checkpoint1 = titor.checkpoint(Some("Before".to_string())).unwrap();
fs::write(temp_dir.path().join("new.txt"), "new line 1\nnew line 2\n").unwrap();
let checkpoint2 = titor.checkpoint(Some("After".to_string())).unwrap();
let options = DiffOptions::default();
let result = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("new.txt"),
options
);
assert!(result.is_err());
}
#[test]
fn test_line_diff_context_lines() {
use crate::types::{DiffOptions, LineChange};
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let content = (1..=10).map(|i| format!("line{}", i)).collect::<Vec<_>>().join("\n");
fs::write(temp_dir.path().join("context.txt"), &content).unwrap();
let checkpoint1 = titor.checkpoint(Some("Original".to_string())).unwrap();
let modified = content.replace("line5", "line5 modified");
fs::write(temp_dir.path().join("context.txt"), &modified).unwrap();
let checkpoint2 = titor.checkpoint(Some("Modified".to_string())).unwrap();
let mut options = DiffOptions::default();
options.context_lines = 2;
let file_diff = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("context.txt"),
options
).unwrap();
assert_eq!(file_diff.hunks.len(), 1);
let hunk = &file_diff.hunks[0];
let context_count = hunk.changes.iter()
.filter(|c| matches!(c, LineChange::Context(_, _)))
.count();
assert!(context_count >= 4);
}
#[test]
fn test_detailed_diff() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
fs::write(temp_dir.path().join("file1.txt"), "content1").unwrap();
fs::write(temp_dir.path().join("file2.txt"), "content2").unwrap();
fs::write(temp_dir.path().join("file3.txt"), "content3").unwrap();
let checkpoint1 = titor.checkpoint(Some("Initial".to_string())).unwrap();
fs::write(temp_dir.path().join("file1.txt"), "content1\nmodified").unwrap();
fs::write(temp_dir.path().join("file2.txt"), "completely different").unwrap();
fs::remove_file(temp_dir.path().join("file3.txt")).unwrap();
fs::write(temp_dir.path().join("file4.txt"), "new file").unwrap();
let checkpoint2 = titor.checkpoint(Some("Changes".to_string())).unwrap();
let options = DiffOptions::default();
let detailed = titor.diff_detailed(&checkpoint1.id, &checkpoint2.id, options).unwrap();
assert_eq!(detailed.basic_diff.added_files.len(), 1); assert_eq!(detailed.basic_diff.modified_files.len(), 2); assert_eq!(detailed.basic_diff.deleted_files.len(), 1);
assert_eq!(detailed.file_diffs.len(), 2); assert!(detailed.total_lines_added > 0);
assert!(detailed.total_lines_deleted > 0);
}
#[test]
fn test_line_diff_whitespace_ignore() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
fs::write(temp_dir.path().join("whitespace.txt"), "line1\nline2 \nline3").unwrap();
let checkpoint1 = titor.checkpoint(Some("Original".to_string())).unwrap();
fs::write(temp_dir.path().join("whitespace.txt"), "line1\nline2\nline3 ").unwrap();
let checkpoint2 = titor.checkpoint(Some("Whitespace changed".to_string())).unwrap();
let options_no_ignore = DiffOptions {
ignore_whitespace: false,
..Default::default()
};
let diff_with_ws = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("whitespace.txt"),
options_no_ignore
).unwrap();
let options_ignore = DiffOptions {
ignore_whitespace: true,
..Default::default()
};
let diff_without_ws = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("whitespace.txt"),
options_ignore
).unwrap();
assert!(diff_with_ws.lines_added > 0 || diff_with_ws.lines_deleted > 0);
assert!(diff_without_ws.lines_added <= diff_with_ws.lines_added);
assert!(diff_without_ws.lines_deleted <= diff_with_ws.lines_deleted);
}
#[test]
fn test_line_diff_large_file() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let large_content = "Large line\n".repeat(10000);
fs::write(temp_dir.path().join("large.txt"), &large_content).unwrap();
let checkpoint1 = titor.checkpoint(Some("Large file".to_string())).unwrap();
let modified = format!("First line modified\n{}", &large_content[11..]);
fs::write(temp_dir.path().join("large.txt"), &modified).unwrap();
let checkpoint2 = titor.checkpoint(Some("Large file modified".to_string())).unwrap();
let small_options = DiffOptions {
max_file_size: 100, ..Default::default()
};
let result = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("large.txt"),
small_options
);
assert!(result.is_err());
let large_options = DiffOptions {
max_file_size: 1024 * 1024 * 100, ..Default::default()
};
let file_diff = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("large.txt"),
large_options
).unwrap();
assert_eq!(file_diff.lines_added, 1);
assert_eq!(file_diff.lines_deleted, 1);
}
#[test]
fn test_line_diff_unicode() {
use crate::types::DiffOptions;
let (mut titor, temp_dir, _storage_dir) = create_test_titor();
let unicode_content = "Hello 世界\n🦀 Rust\nΓειά σου κόσμε\n";
fs::write(temp_dir.path().join("unicode.txt"), unicode_content).unwrap();
let checkpoint1 = titor.checkpoint(Some("Unicode v1".to_string())).unwrap();
let modified = "Hello 世界\n🦀 Rust 🔥\nΓειά σου κόσμε\n新しい行\n";
fs::write(temp_dir.path().join("unicode.txt"), modified).unwrap();
let checkpoint2 = titor.checkpoint(Some("Unicode v2".to_string())).unwrap();
let options = DiffOptions::default();
let file_diff = titor.diff_file(
&checkpoint1.id,
&checkpoint2.id,
Path::new("unicode.txt"),
options
).unwrap();
assert!(!file_diff.is_binary);
assert_eq!(file_diff.lines_added, 2); assert_eq!(file_diff.lines_deleted, 1); }
#[test]
fn test_gitignore_creation() {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
std::fs::remove_dir_all(storage_dir.path()).ok();
let gitignore_path = root_dir.path().join(".gitignore");
assert!(!gitignore_path.exists(), ".gitignore should not exist initially");
let _titor = Titor::init(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
assert!(gitignore_path.exists(), ".gitignore should be created");
let content = fs::read_to_string(&gitignore_path).unwrap();
assert!(content.contains(".titor"), ".gitignore should contain .titor");
}
#[test]
fn test_gitignore_existing_file() {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
std::fs::remove_dir_all(storage_dir.path()).ok();
let gitignore_path = root_dir.path().join(".gitignore");
fs::write(&gitignore_path, "*.log\nnode_modules/\n").unwrap();
let _titor = Titor::init(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
let content = fs::read_to_string(&gitignore_path).unwrap();
assert!(content.contains("*.log"), "Existing content should be preserved");
assert!(content.contains("node_modules/"), "Existing content should be preserved");
assert!(content.contains(".titor"), ".gitignore should contain .titor");
let lines: Vec<&str> = content.lines().collect();
assert!(lines.contains(&"*.log"));
assert!(lines.contains(&"node_modules/"));
assert!(lines.contains(&".titor"));
}
#[test]
fn test_gitignore_already_contains_titor() {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
std::fs::remove_dir_all(storage_dir.path()).ok();
let gitignore_path = root_dir.path().join(".gitignore");
fs::write(&gitignore_path, "*.log\n.titor\nnode_modules/\n").unwrap();
let original_content = fs::read_to_string(&gitignore_path).unwrap();
let _titor = Titor::init(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
let new_content = fs::read_to_string(&gitignore_path).unwrap();
assert_eq!(original_content, new_content, ".gitignore should not be modified if .titor already exists");
}
#[test]
fn test_gitignore_on_open() {
let root_dir = TempDir::new().unwrap();
let storage_dir = TempDir::new().unwrap();
std::fs::remove_dir_all(storage_dir.path()).ok();
let _titor = Titor::init(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
let gitignore_path = root_dir.path().join(".gitignore");
fs::remove_file(&gitignore_path).unwrap();
assert!(!gitignore_path.exists());
let _titor2 = Titor::open(
root_dir.path().to_path_buf(),
storage_dir.path().to_path_buf(),
).unwrap();
assert!(gitignore_path.exists(), ".gitignore should be created on open");
let content = fs::read_to_string(&gitignore_path).unwrap();
assert!(content.contains(".titor"), ".gitignore should contain .titor");
}
}