1use anyhow::{Context, Result, bail};
7use frankensqlite::Row;
8use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
14
15pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
17
18pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
20
21pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
23
24pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
26
27pub const AEAD_TAG_OVERHEAD: u64 = 16;
29
30pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
32
33pub const COMPRESSION_RATIO: f64 = 0.45;
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SizeEstimate {
39 pub plaintext_bytes: u64,
41 pub compressed_bytes: u64,
43 pub encrypted_bytes: u64,
45 pub static_assets_bytes: u64,
47 pub total_site_bytes: u64,
49 pub chunk_count: u32,
51 pub conversation_count: u64,
53 pub message_count: u64,
55}
56
57impl SizeEstimate {
58 pub fn from_database<P: AsRef<Path>>(
60 db_path: P,
61 agents: Option<&[String]>,
62 since_ts: Option<i64>,
63 until_ts: Option<i64>,
64 ) -> Result<Self> {
65 let conn = super::open_existing_sqlite_db(db_path.as_ref())
66 .context("Failed to open database for size estimation")?;
67
68 conn.execute("PRAGMA busy_timeout = 5000;")?;
69
70 let mut conditions = Vec::new();
72 let mut param_values: Vec<ParamValue> = Vec::new();
73
74 if let Some(agents) = agents {
75 if agents.is_empty() {
76 conditions.push("1=0".to_string());
77 } else {
78 let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
79 conditions.push(format!(
85 "c.agent_id IN (SELECT a.id FROM agents a WHERE a.slug IN ({}))",
86 placeholders.join(", ")
87 ));
88 for agent in agents {
89 param_values.push(ParamValue::from(agent.as_str()));
90 }
91 }
92 }
93
94 if let Some(since) = since_ts {
95 conditions.push("c.started_at >= ?".to_string());
96 param_values.push(ParamValue::from(since));
97 }
98
99 if let Some(until) = until_ts {
100 conditions.push("c.started_at <= ?".to_string());
101 param_values.push(ParamValue::from(until));
102 }
103
104 let where_clause = if conditions.is_empty() {
105 String::new()
106 } else {
107 format!(" WHERE {}", conditions.join(" AND "))
108 };
109
110 let params_slice = ¶m_values;
111
112 let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
119 let conversation_count: u64 = conn
120 .query_row_map(&conv_sql, params_slice, |row: &Row| {
121 row.get_typed::<Option<i64>>(0)
122 .map(|opt| opt.unwrap_or(0).max(0) as u64)
123 })
124 .with_context(|| {
125 format!("Failed to count conversations for size estimate: {conv_sql}")
126 })?;
127
128 let msg_sql = format!(
130 "SELECT COUNT(*), SUM(LENGTH(m.content))
131 FROM messages m
132 JOIN conversations c ON m.conversation_id = c.id
133 {}",
134 where_clause
135 );
136 let (message_count, plaintext_bytes): (u64, u64) = conn
137 .query_row_map(&msg_sql, params_slice, |row: &Row| {
138 let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
139 let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
140 Ok((
141 raw_message_count.max(0) as u64,
142 raw_plaintext_bytes.max(0) as u64,
143 ))
144 })
145 .with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
146
147 Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
148 }
149
150 pub fn from_plaintext_size(
152 plaintext_bytes: u64,
153 conversation_count: u64,
154 message_count: u64,
155 ) -> Result<Self> {
156 let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
158
159 let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
161 let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
162
163 let aead_overhead = u64::from(chunk_count)
165 .checked_mul(AEAD_TAG_OVERHEAD)
166 .ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
167 let encrypted_bytes = compressed_bytes
168 .checked_add(aead_overhead)
169 .ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
170
171 let total_site_bytes = encrypted_bytes
173 .checked_add(STATIC_ASSETS_SIZE)
174 .ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
175
176 Ok(Self {
177 plaintext_bytes,
178 compressed_bytes,
179 encrypted_bytes,
180 static_assets_bytes: STATIC_ASSETS_SIZE,
181 total_site_bytes,
182 chunk_count,
183 conversation_count,
184 message_count,
185 })
186 }
187
188 pub fn check_limits(&self) -> SizeLimitResult {
190 if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
191 return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
192 actual: self.total_site_bytes,
193 limit: MAX_SITE_SIZE_BYTES,
194 });
195 }
196
197 if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
198 return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
199 actual: self.total_site_bytes,
200 limit: MAX_SITE_SIZE_BYTES,
201 percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
202 as u8,
203 });
204 }
205
206 SizeLimitResult::Ok
207 }
208
209 pub fn format_display(&self) -> String {
211 format!(
212 "Estimated bundle size: {}\n\
213 • Payload: {} ({} chunks × {} max)\n\
214 • Static assets: {}\n\
215 • Compression ratio: ~{:.0}%\n\
216 • Conversations: {}\n\
217 • Messages: {}",
218 format_bytes(self.total_site_bytes),
219 format_bytes(self.encrypted_bytes),
220 self.chunk_count,
221 format_bytes(DEFAULT_CHUNK_SIZE),
222 format_bytes(self.static_assets_bytes),
223 COMPRESSION_RATIO * 100.0,
224 self.conversation_count,
225 self.message_count,
226 )
227 }
228}
229
230#[derive(Debug, Clone)]
232pub enum SizeLimitResult {
233 Ok,
235 Warning(SizeWarning),
237 ExceedsLimit(SizeError),
239}
240
241impl SizeLimitResult {
242 pub fn is_ok(&self) -> bool {
244 matches!(self, SizeLimitResult::Ok)
245 }
246
247 pub fn is_warning(&self) -> bool {
249 matches!(self, SizeLimitResult::Warning(_))
250 }
251
252 pub fn is_error(&self) -> bool {
254 matches!(self, SizeLimitResult::ExceedsLimit(_))
255 }
256}
257
258#[derive(Debug, Clone, thiserror::Error)]
260pub enum SizeError {
261 #[error(
263 "Total size ({}) exceeds GitHub Pages limit ({})\n\n\
264 Suggestions:\n\
265 • Use --since \"90 days ago\" for recent conversations only\n\
266 • Use --agents <name> to limit to specific agents\n\
267 • Use --workspaces <path> to limit projects",
268 format_bytes(*actual),
269 format_bytes(*limit)
270 )]
271 TotalExceedsLimit { actual: u64, limit: u64 },
272 #[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
274 FileExceedsLimit {
275 path: String,
276 actual: u64,
277 limit: u64,
278 },
279}
280
281#[derive(Debug, Clone)]
283pub enum SizeWarning {
284 ApproachingLimit {
286 actual: u64,
287 limit: u64,
288 percentage: u8,
289 },
290 LargeFile { path: String, size: u64 },
292}
293
294impl std::fmt::Display for SizeWarning {
295 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
296 match self {
297 SizeWarning::ApproachingLimit {
298 actual,
299 limit,
300 percentage,
301 } => {
302 write!(
303 f,
304 "Estimated size {} is {}% of GitHub Pages limit ({})",
305 format_bytes(*actual),
306 percentage,
307 format_bytes(*limit)
308 )
309 }
310 SizeWarning::LargeFile { path, size } => {
311 write!(f, "Large file: {} ({})", path, format_bytes(*size))
312 }
313 }
314 }
315}
316
317pub struct BundleVerifier;
319
320impl BundleVerifier {
321 pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
323 let site_dir = site_dir.as_ref();
324 let mut warnings = Vec::new();
325 let mut total_size = 0u64;
326
327 visit_files(site_dir, &mut |path, size| {
328 total_size += size;
329
330 if size > MAX_FILE_SIZE_BYTES {
331 bail!(
332 "File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
333 path.display(),
334 format_bytes(size),
335 format_bytes(MAX_FILE_SIZE_BYTES)
336 );
337 }
338
339 if size > FILE_SIZE_WARNING_BYTES {
340 let rel_path = path
341 .strip_prefix(site_dir)
342 .unwrap_or(path)
343 .to_string_lossy()
344 .to_string();
345 warnings.push(SizeWarning::LargeFile {
346 path: rel_path,
347 size,
348 });
349 }
350
351 Ok(())
352 })?;
353
354 if total_size > MAX_SITE_SIZE_BYTES {
355 bail!(
356 "Total bundle size ({}) exceeds GitHub Pages limit ({})",
357 format_bytes(total_size),
358 format_bytes(MAX_SITE_SIZE_BYTES)
359 );
360 }
361
362 if total_size > SITE_SIZE_WARNING_BYTES {
363 warnings.push(SizeWarning::ApproachingLimit {
364 actual: total_size,
365 limit: MAX_SITE_SIZE_BYTES,
366 percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
367 });
368 }
369
370 Ok(warnings)
371 }
372}
373
374fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
376where
377 F: FnMut(&Path, u64) -> Result<()>,
378{
379 for entry in std::fs::read_dir(dir)? {
380 let entry = entry?;
381 let path = entry.path();
382 let metadata = std::fs::symlink_metadata(&path)?;
383 let file_type = metadata.file_type();
384
385 if file_type.is_symlink() {
386 continue;
387 }
388
389 if file_type.is_dir() {
390 visit_files(&path, f)?;
391 } else if file_type.is_file() {
392 f(&path, metadata.len())?;
393 }
394 }
395 Ok(())
396}
397
398fn format_bytes(bytes: u64) -> String {
400 const KB: u64 = 1024;
401 const MB: u64 = 1024 * KB;
402 const GB: u64 = 1024 * MB;
403
404 if bytes >= GB {
405 format!("{:.1} GB", bytes as f64 / GB as f64)
406 } else if bytes >= MB {
407 format!("{:.1} MB", bytes as f64 / MB as f64)
408 } else if bytes >= KB {
409 format!("{:.1} KB", bytes as f64 / KB as f64)
410 } else {
411 format!("{} bytes", bytes)
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418 use frankensqlite::Connection;
419
420 #[test]
421 fn test_size_estimate_from_plaintext() {
422 let estimate = SizeEstimate::from_plaintext_size(
423 10 * 1024 * 1024, 100,
425 5000,
426 )
427 .unwrap();
428
429 assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
431 assert_eq!(estimate.conversation_count, 100);
432 assert_eq!(estimate.message_count, 5000);
433 assert!(estimate.chunk_count >= 1);
434 }
435
436 #[test]
437 fn test_size_estimate_empty() {
438 let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
439 assert_eq!(estimate.plaintext_bytes, 0);
440 assert_eq!(estimate.chunk_count, 1); assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
442 }
443
444 #[test]
445 fn test_size_limit_ok() {
446 let estimate = SizeEstimate::from_plaintext_size(
447 100 * 1024 * 1024, 100,
449 5000,
450 )
451 .unwrap();
452
453 let result = estimate.check_limits();
454 assert!(result.is_ok());
455 }
456
457 #[test]
458 fn test_size_limit_warning() {
459 let estimate = SizeEstimate::from_plaintext_size(
462 2000 * 1024 * 1024, 1000,
464 50000,
465 )
466 .unwrap();
467
468 let result = estimate.check_limits();
469 assert!(result.is_warning() || result.is_error());
470 }
471
472 #[test]
473 fn test_size_limit_exceeded() {
474 let estimate = SizeEstimate::from_plaintext_size(
475 3000 * 1024 * 1024, 5000,
477 250000,
478 )
479 .unwrap();
480
481 let result = estimate.check_limits();
482 assert!(result.is_error());
483 }
484
485 #[test]
486 fn test_format_bytes() {
487 assert_eq!(format_bytes(500), "500 bytes");
488 assert_eq!(format_bytes(1024), "1.0 KB");
489 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
490 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
491 assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
492 }
493
494 #[test]
495 fn test_format_display() {
496 let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
497
498 let display = estimate.format_display();
499 assert!(display.contains("Estimated bundle size"));
500 assert!(display.contains("Conversations: 50"));
501 assert!(display.contains("Messages: 2500"));
502 }
503
504 #[test]
505 fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
506 let temp = tempfile::TempDir::new()?;
507 let db_path = temp.path().join("cass.db");
508 let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
509 conn.execute_batch(
510 "CREATE TABLE agents (
511 id INTEGER PRIMARY KEY,
512 slug TEXT NOT NULL
513 );
514 CREATE TABLE conversations (
515 id INTEGER PRIMARY KEY,
516 agent_id INTEGER NOT NULL,
517 started_at INTEGER
518 );
519 CREATE TABLE messages (
520 id INTEGER PRIMARY KEY,
521 conversation_id INTEGER NOT NULL,
522 content TEXT NOT NULL
523 );
524 INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
525 INSERT INTO conversations (id, agent_id, started_at)
526 VALUES (10, 1, 1000), (20, 2, 2000);
527 INSERT INTO messages (id, conversation_id, content)
528 VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
529 )?;
530
531 let all = SizeEstimate::from_database(&db_path, None, None, None)?;
532 assert_eq!(all.conversation_count, 2);
533 assert_eq!(all.message_count, 2);
534 assert_eq!(all.plaintext_bytes, 14);
535
536 let codex =
537 SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
538 assert_eq!(codex.conversation_count, 1);
539 assert_eq!(codex.message_count, 1);
540 assert_eq!(codex.plaintext_bytes, 9);
541
542 let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
543 assert_eq!(empty_agent_filter.conversation_count, 0);
544 assert_eq!(empty_agent_filter.message_count, 0);
545 assert_eq!(empty_agent_filter.plaintext_bytes, 0);
546
547 let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
548 assert_eq!(recent.conversation_count, 1);
549 assert_eq!(recent.message_count, 1);
550 assert_eq!(recent.plaintext_bytes, 9);
551
552 Ok(())
553 }
554
555 #[test]
556 fn test_from_database_allows_read_only_source_db() -> Result<()> {
557 let temp = tempfile::TempDir::new()?;
558 let db_path = temp.path().join("cass-read-only.db");
559 let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
560 conn.execute_batch(
561 "CREATE TABLE agents (
562 id INTEGER PRIMARY KEY,
563 slug TEXT NOT NULL
564 );
565 CREATE TABLE conversations (
566 id INTEGER PRIMARY KEY,
567 agent_id INTEGER NOT NULL,
568 started_at INTEGER
569 );
570 CREATE TABLE messages (
571 id INTEGER PRIMARY KEY,
572 conversation_id INTEGER NOT NULL,
573 content TEXT NOT NULL
574 );
575 INSERT INTO agents (id, slug) VALUES (1, 'claude');
576 INSERT INTO conversations (id, agent_id, started_at) VALUES (10, 1, 1000);
577 INSERT INTO messages (id, conversation_id, content) VALUES (100, 10, 'readonly');",
578 )?;
579 drop(conn);
580
581 let original_permissions = std::fs::metadata(&db_path)?.permissions();
582 let mut read_only_permissions = original_permissions.clone();
583 read_only_permissions.set_readonly(true);
584 std::fs::set_permissions(&db_path, read_only_permissions)?;
585
586 let estimate = SizeEstimate::from_database(&db_path, None, None, None);
587
588 std::fs::set_permissions(&db_path, original_permissions)?;
589 let estimate = estimate?;
590
591 assert_eq!(estimate.conversation_count, 1);
592 assert_eq!(estimate.message_count, 1);
593 assert_eq!(estimate.plaintext_bytes, 8);
594 Ok(())
595 }
596
597 #[test]
598 fn test_size_error_display() {
599 let err = SizeError::TotalExceedsLimit {
600 actual: 2 * 1024 * 1024 * 1024,
601 limit: 1024 * 1024 * 1024,
602 };
603
604 let msg = err.to_string();
605 assert!(msg.contains("2.0 GB"));
606 assert!(msg.contains("1.0 GB"));
607 assert!(msg.contains("Suggestions"));
608 }
609
610 #[test]
611 fn test_size_error_display_and_source_are_preserved() {
612 let cases = vec![
613 (
614 SizeError::TotalExceedsLimit {
615 actual: 2 * 1024 * 1024 * 1024,
616 limit: 1024 * 1024 * 1024,
617 },
618 "Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
619 Suggestions:\n\
620 • Use --since \"90 days ago\" for recent conversations only\n\
621 • Use --agents <name> to limit to specific agents\n\
622 • Use --workspaces <path> to limit projects",
623 ),
624 (
625 SizeError::FileExceedsLimit {
626 path: "site/archive.bin".to_string(),
627 actual: 150 * 1024 * 1024,
628 limit: 100 * 1024 * 1024,
629 },
630 "File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
631 ),
632 ];
633
634 for (error, expected_display) in cases {
635 assert_eq!(error.to_string(), expected_display);
636 assert!(std::error::Error::source(&error).is_none());
637 }
638 }
639
640 #[test]
641 fn test_bundle_verifier() {
642 use tempfile::TempDir;
643
644 let temp = TempDir::new().unwrap();
645
646 std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
648 std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
649
650 let warnings = BundleVerifier::verify(temp.path()).unwrap();
651 assert!(warnings.is_empty()); }
653
654 #[test]
655 fn test_chunk_count_ceiling_division() {
656 let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
661 assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
662
663 let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
667 let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
668 assert_eq!(
671 estimate.chunk_count, 1,
672 "Exactly one chunk's worth should be 1 chunk, not 2"
673 );
674
675 let over_one_chunk = one_chunk_plaintext + 1000000; let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
678 assert!(
679 estimate.chunk_count >= 1,
680 "Over one chunk should be at least 1 chunk"
681 );
682
683 let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
685 let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
686 assert_eq!(
687 estimate.chunk_count, 2,
688 "Exactly two chunks' worth should be 2 chunks, not 3"
689 );
690 }
691
692 #[test]
693 fn test_from_plaintext_size_handles_extremely_large_inputs() {
694 let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
695 assert_eq!(estimate.chunk_count, u32::MAX);
696 assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
697 }
698
699 #[test]
700 #[cfg(unix)]
701 fn test_visit_files_skips_symlink_paths() {
702 use std::collections::HashSet;
703 use std::os::unix::fs::symlink;
704 use tempfile::TempDir;
705
706 let src = TempDir::new().unwrap();
707 let outside = TempDir::new().unwrap();
708
709 std::fs::write(src.path().join("root.txt"), "root").unwrap();
710 std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
711 std::fs::create_dir_all(outside.path().join("nested")).unwrap();
712 std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
713
714 symlink(
715 outside.path().join("secret.txt"),
716 src.path().join("linked-file.txt"),
717 )
718 .unwrap();
719 symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
720
721 let mut visited = HashSet::new();
722 visit_files(src.path(), &mut |path, _size| {
723 visited.insert(
724 path.strip_prefix(src.path())
725 .unwrap()
726 .to_string_lossy()
727 .to_string(),
728 );
729 Ok(())
730 })
731 .unwrap();
732
733 assert!(visited.contains("root.txt"));
734 assert!(!visited.contains("linked-file.txt"));
735 assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
736 }
737}