1use anyhow::{Context, Result, bail};
7use frankensqlite::Row;
8use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
14
15pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
17
18pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
20
21pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
23
24pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
26
27pub const AEAD_TAG_OVERHEAD: u64 = 16;
29
30pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
32
33pub const COMPRESSION_RATIO: f64 = 0.45;
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SizeEstimate {
39 pub plaintext_bytes: u64,
41 pub compressed_bytes: u64,
43 pub encrypted_bytes: u64,
45 pub static_assets_bytes: u64,
47 pub total_site_bytes: u64,
49 pub chunk_count: u32,
51 pub conversation_count: u64,
53 pub message_count: u64,
55}
56
57impl SizeEstimate {
58 pub fn from_database<P: AsRef<Path>>(
60 db_path: P,
61 agents: Option<&[String]>,
62 since_ts: Option<i64>,
63 until_ts: Option<i64>,
64 ) -> Result<Self> {
65 let conn = super::open_existing_sqlite_db(db_path.as_ref())
66 .context("Failed to open database for size estimation")?;
67
68 conn.execute("PRAGMA busy_timeout = 5000;")?;
69
70 let mut conditions = Vec::new();
72 let mut param_values: Vec<ParamValue> = Vec::new();
73
74 if let Some(agents) = agents {
75 if agents.is_empty() {
76 conditions.push("1=0".to_string());
77 } else {
78 let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
79 conditions.push(format!(
83 "c.agent_id IN (SELECT a.id FROM agents a WHERE a.slug IN ({}))",
84 placeholders.join(", ")
85 ));
86 for agent in agents {
87 param_values.push(ParamValue::from(agent.as_str()));
88 }
89 }
90 }
91
92 if let Some(since) = since_ts {
93 conditions.push("c.started_at >= ?".to_string());
94 param_values.push(ParamValue::from(since));
95 }
96
97 if let Some(until) = until_ts {
98 conditions.push("c.started_at <= ?".to_string());
99 param_values.push(ParamValue::from(until));
100 }
101
102 let where_clause = if conditions.is_empty() {
103 String::new()
104 } else {
105 format!(" WHERE {}", conditions.join(" AND "))
106 };
107
108 let params_slice = ¶m_values;
109
110 let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
117 let conversation_count: u64 = conn
118 .query_row_map(&conv_sql, params_slice, |row: &Row| {
119 row.get_typed::<Option<i64>>(0)
120 .map(|opt| opt.unwrap_or(0).max(0) as u64)
121 })
122 .with_context(|| {
123 format!("Failed to count conversations for size estimate: {conv_sql}")
124 })?;
125
126 let msg_sql = format!(
128 "SELECT COUNT(*), SUM(LENGTH(m.content))
129 FROM messages m
130 JOIN conversations c ON m.conversation_id = c.id
131 {}",
132 where_clause
133 );
134 let (message_count, plaintext_bytes): (u64, u64) = conn
135 .query_row_map(&msg_sql, params_slice, |row: &Row| {
136 let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
137 let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
138 Ok((
139 raw_message_count.max(0) as u64,
140 raw_plaintext_bytes.max(0) as u64,
141 ))
142 })
143 .with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
144
145 Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
146 }
147
148 pub fn from_plaintext_size(
150 plaintext_bytes: u64,
151 conversation_count: u64,
152 message_count: u64,
153 ) -> Result<Self> {
154 let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
156
157 let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
159 let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
160
161 let aead_overhead = u64::from(chunk_count)
163 .checked_mul(AEAD_TAG_OVERHEAD)
164 .ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
165 let encrypted_bytes = compressed_bytes
166 .checked_add(aead_overhead)
167 .ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
168
169 let total_site_bytes = encrypted_bytes
171 .checked_add(STATIC_ASSETS_SIZE)
172 .ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
173
174 Ok(Self {
175 plaintext_bytes,
176 compressed_bytes,
177 encrypted_bytes,
178 static_assets_bytes: STATIC_ASSETS_SIZE,
179 total_site_bytes,
180 chunk_count,
181 conversation_count,
182 message_count,
183 })
184 }
185
186 pub fn check_limits(&self) -> SizeLimitResult {
188 if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
189 return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
190 actual: self.total_site_bytes,
191 limit: MAX_SITE_SIZE_BYTES,
192 });
193 }
194
195 if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
196 return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
197 actual: self.total_site_bytes,
198 limit: MAX_SITE_SIZE_BYTES,
199 percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
200 as u8,
201 });
202 }
203
204 SizeLimitResult::Ok
205 }
206
207 pub fn format_display(&self) -> String {
209 format!(
210 "Estimated bundle size: {}\n\
211 • Payload: {} ({} chunks × {} max)\n\
212 • Static assets: {}\n\
213 • Compression ratio: ~{:.0}%\n\
214 • Conversations: {}\n\
215 • Messages: {}",
216 format_bytes(self.total_site_bytes),
217 format_bytes(self.encrypted_bytes),
218 self.chunk_count,
219 format_bytes(DEFAULT_CHUNK_SIZE),
220 format_bytes(self.static_assets_bytes),
221 COMPRESSION_RATIO * 100.0,
222 self.conversation_count,
223 self.message_count,
224 )
225 }
226}
227
228#[derive(Debug, Clone)]
230pub enum SizeLimitResult {
231 Ok,
233 Warning(SizeWarning),
235 ExceedsLimit(SizeError),
237}
238
239impl SizeLimitResult {
240 pub fn is_ok(&self) -> bool {
242 matches!(self, SizeLimitResult::Ok)
243 }
244
245 pub fn is_warning(&self) -> bool {
247 matches!(self, SizeLimitResult::Warning(_))
248 }
249
250 pub fn is_error(&self) -> bool {
252 matches!(self, SizeLimitResult::ExceedsLimit(_))
253 }
254}
255
256#[derive(Debug, Clone, thiserror::Error)]
258pub enum SizeError {
259 #[error(
261 "Total size ({}) exceeds GitHub Pages limit ({})\n\n\
262 Suggestions:\n\
263 • Use --since \"90 days ago\" for recent conversations only\n\
264 • Use --agents <name> to limit to specific agents\n\
265 • Use --workspaces <path> to limit projects",
266 format_bytes(*actual),
267 format_bytes(*limit)
268 )]
269 TotalExceedsLimit { actual: u64, limit: u64 },
270 #[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
272 FileExceedsLimit {
273 path: String,
274 actual: u64,
275 limit: u64,
276 },
277}
278
279#[derive(Debug, Clone)]
281pub enum SizeWarning {
282 ApproachingLimit {
284 actual: u64,
285 limit: u64,
286 percentage: u8,
287 },
288 LargeFile { path: String, size: u64 },
290}
291
292impl std::fmt::Display for SizeWarning {
293 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294 match self {
295 SizeWarning::ApproachingLimit {
296 actual,
297 limit,
298 percentage,
299 } => {
300 write!(
301 f,
302 "Estimated size {} is {}% of GitHub Pages limit ({})",
303 format_bytes(*actual),
304 percentage,
305 format_bytes(*limit)
306 )
307 }
308 SizeWarning::LargeFile { path, size } => {
309 write!(f, "Large file: {} ({})", path, format_bytes(*size))
310 }
311 }
312 }
313}
314
315pub struct BundleVerifier;
317
318impl BundleVerifier {
319 pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
321 let site_dir = site_dir.as_ref();
322 let mut warnings = Vec::new();
323 let mut total_size = 0u64;
324
325 visit_files(site_dir, &mut |path, size| {
326 total_size += size;
327
328 if size > MAX_FILE_SIZE_BYTES {
329 bail!(
330 "File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
331 path.display(),
332 format_bytes(size),
333 format_bytes(MAX_FILE_SIZE_BYTES)
334 );
335 }
336
337 if size > FILE_SIZE_WARNING_BYTES {
338 let rel_path = path
339 .strip_prefix(site_dir)
340 .unwrap_or(path)
341 .to_string_lossy()
342 .to_string();
343 warnings.push(SizeWarning::LargeFile {
344 path: rel_path,
345 size,
346 });
347 }
348
349 Ok(())
350 })?;
351
352 if total_size > MAX_SITE_SIZE_BYTES {
353 bail!(
354 "Total bundle size ({}) exceeds GitHub Pages limit ({})",
355 format_bytes(total_size),
356 format_bytes(MAX_SITE_SIZE_BYTES)
357 );
358 }
359
360 if total_size > SITE_SIZE_WARNING_BYTES {
361 warnings.push(SizeWarning::ApproachingLimit {
362 actual: total_size,
363 limit: MAX_SITE_SIZE_BYTES,
364 percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
365 });
366 }
367
368 Ok(warnings)
369 }
370}
371
372fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
374where
375 F: FnMut(&Path, u64) -> Result<()>,
376{
377 for entry in std::fs::read_dir(dir)? {
378 let entry = entry?;
379 let path = entry.path();
380 let metadata = std::fs::symlink_metadata(&path)?;
381 let file_type = metadata.file_type();
382
383 if file_type.is_symlink() {
384 continue;
385 }
386
387 if file_type.is_dir() {
388 visit_files(&path, f)?;
389 } else if file_type.is_file() {
390 f(&path, metadata.len())?;
391 }
392 }
393 Ok(())
394}
395
396fn format_bytes(bytes: u64) -> String {
398 const KB: u64 = 1024;
399 const MB: u64 = 1024 * KB;
400 const GB: u64 = 1024 * MB;
401
402 if bytes >= GB {
403 format!("{:.1} GB", bytes as f64 / GB as f64)
404 } else if bytes >= MB {
405 format!("{:.1} MB", bytes as f64 / MB as f64)
406 } else if bytes >= KB {
407 format!("{:.1} KB", bytes as f64 / KB as f64)
408 } else {
409 format!("{} bytes", bytes)
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use frankensqlite::Connection;
417
418 #[test]
419 fn test_size_estimate_from_plaintext() {
420 let estimate = SizeEstimate::from_plaintext_size(
421 10 * 1024 * 1024, 100,
423 5000,
424 )
425 .unwrap();
426
427 assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
429 assert_eq!(estimate.conversation_count, 100);
430 assert_eq!(estimate.message_count, 5000);
431 assert!(estimate.chunk_count >= 1);
432 }
433
434 #[test]
435 fn test_size_estimate_empty() {
436 let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
437 assert_eq!(estimate.plaintext_bytes, 0);
438 assert_eq!(estimate.chunk_count, 1); assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
440 }
441
442 #[test]
443 fn test_size_limit_ok() {
444 let estimate = SizeEstimate::from_plaintext_size(
445 100 * 1024 * 1024, 100,
447 5000,
448 )
449 .unwrap();
450
451 let result = estimate.check_limits();
452 assert!(result.is_ok());
453 }
454
455 #[test]
456 fn test_size_limit_warning() {
457 let estimate = SizeEstimate::from_plaintext_size(
460 2000 * 1024 * 1024, 1000,
462 50000,
463 )
464 .unwrap();
465
466 let result = estimate.check_limits();
467 assert!(result.is_warning() || result.is_error());
468 }
469
470 #[test]
471 fn test_size_limit_exceeded() {
472 let estimate = SizeEstimate::from_plaintext_size(
473 3000 * 1024 * 1024, 5000,
475 250000,
476 )
477 .unwrap();
478
479 let result = estimate.check_limits();
480 assert!(result.is_error());
481 }
482
483 #[test]
484 fn test_format_bytes() {
485 assert_eq!(format_bytes(500), "500 bytes");
486 assert_eq!(format_bytes(1024), "1.0 KB");
487 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
488 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
489 assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
490 }
491
492 #[test]
493 fn test_format_display() {
494 let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
495
496 let display = estimate.format_display();
497 assert!(display.contains("Estimated bundle size"));
498 assert!(display.contains("Conversations: 50"));
499 assert!(display.contains("Messages: 2500"));
500 }
501
502 #[test]
503 fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
504 let temp = tempfile::TempDir::new()?;
505 let db_path = temp.path().join("cass.db");
506 let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
507 conn.execute_batch(
508 "CREATE TABLE agents (
509 id INTEGER PRIMARY KEY,
510 slug TEXT NOT NULL
511 );
512 CREATE TABLE conversations (
513 id INTEGER PRIMARY KEY,
514 agent_id INTEGER NOT NULL,
515 started_at INTEGER
516 );
517 CREATE TABLE messages (
518 id INTEGER PRIMARY KEY,
519 conversation_id INTEGER NOT NULL,
520 content TEXT NOT NULL
521 );
522 INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
523 INSERT INTO conversations (id, agent_id, started_at)
524 VALUES (10, 1, 1000), (20, 2, 2000);
525 INSERT INTO messages (id, conversation_id, content)
526 VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
527 )?;
528
529 let all = SizeEstimate::from_database(&db_path, None, None, None)?;
530 assert_eq!(all.conversation_count, 2);
531 assert_eq!(all.message_count, 2);
532 assert_eq!(all.plaintext_bytes, 14);
533
534 let codex =
535 SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
536 assert_eq!(codex.conversation_count, 1);
537 assert_eq!(codex.message_count, 1);
538 assert_eq!(codex.plaintext_bytes, 9);
539
540 let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
541 assert_eq!(empty_agent_filter.conversation_count, 0);
542 assert_eq!(empty_agent_filter.message_count, 0);
543 assert_eq!(empty_agent_filter.plaintext_bytes, 0);
544
545 let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
546 assert_eq!(recent.conversation_count, 1);
547 assert_eq!(recent.message_count, 1);
548 assert_eq!(recent.plaintext_bytes, 9);
549
550 Ok(())
551 }
552
553 #[test]
554 fn test_from_database_allows_read_only_source_db() -> Result<()> {
555 let temp = tempfile::TempDir::new()?;
556 let db_path = temp.path().join("cass-read-only.db");
557 let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
558 conn.execute_batch(
559 "CREATE TABLE agents (
560 id INTEGER PRIMARY KEY,
561 slug TEXT NOT NULL
562 );
563 CREATE TABLE conversations (
564 id INTEGER PRIMARY KEY,
565 agent_id INTEGER NOT NULL,
566 started_at INTEGER
567 );
568 CREATE TABLE messages (
569 id INTEGER PRIMARY KEY,
570 conversation_id INTEGER NOT NULL,
571 content TEXT NOT NULL
572 );
573 INSERT INTO agents (id, slug) VALUES (1, 'claude');
574 INSERT INTO conversations (id, agent_id, started_at) VALUES (10, 1, 1000);
575 INSERT INTO messages (id, conversation_id, content) VALUES (100, 10, 'readonly');",
576 )?;
577 drop(conn);
578
579 let original_permissions = std::fs::metadata(&db_path)?.permissions();
580 let mut read_only_permissions = original_permissions.clone();
581 read_only_permissions.set_readonly(true);
582 std::fs::set_permissions(&db_path, read_only_permissions)?;
583
584 let estimate = SizeEstimate::from_database(&db_path, None, None, None);
585
586 std::fs::set_permissions(&db_path, original_permissions)?;
587 let estimate = estimate?;
588
589 assert_eq!(estimate.conversation_count, 1);
590 assert_eq!(estimate.message_count, 1);
591 assert_eq!(estimate.plaintext_bytes, 8);
592 Ok(())
593 }
594
595 #[test]
596 fn test_size_error_display() {
597 let err = SizeError::TotalExceedsLimit {
598 actual: 2 * 1024 * 1024 * 1024,
599 limit: 1024 * 1024 * 1024,
600 };
601
602 let msg = err.to_string();
603 assert!(msg.contains("2.0 GB"));
604 assert!(msg.contains("1.0 GB"));
605 assert!(msg.contains("Suggestions"));
606 }
607
608 #[test]
609 fn test_size_error_display_and_source_are_preserved() {
610 let cases = vec![
611 (
612 SizeError::TotalExceedsLimit {
613 actual: 2 * 1024 * 1024 * 1024,
614 limit: 1024 * 1024 * 1024,
615 },
616 "Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
617 Suggestions:\n\
618 • Use --since \"90 days ago\" for recent conversations only\n\
619 • Use --agents <name> to limit to specific agents\n\
620 • Use --workspaces <path> to limit projects",
621 ),
622 (
623 SizeError::FileExceedsLimit {
624 path: "site/archive.bin".to_string(),
625 actual: 150 * 1024 * 1024,
626 limit: 100 * 1024 * 1024,
627 },
628 "File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
629 ),
630 ];
631
632 for (error, expected_display) in cases {
633 assert_eq!(error.to_string(), expected_display);
634 assert!(std::error::Error::source(&error).is_none());
635 }
636 }
637
638 #[test]
639 fn test_bundle_verifier() {
640 use tempfile::TempDir;
641
642 let temp = TempDir::new().unwrap();
643
644 std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
646 std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
647
648 let warnings = BundleVerifier::verify(temp.path()).unwrap();
649 assert!(warnings.is_empty()); }
651
652 #[test]
653 fn test_chunk_count_ceiling_division() {
654 let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
659 assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
660
661 let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
665 let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
666 assert_eq!(
669 estimate.chunk_count, 1,
670 "Exactly one chunk's worth should be 1 chunk, not 2"
671 );
672
673 let over_one_chunk = one_chunk_plaintext + 1000000; let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
676 assert!(
677 estimate.chunk_count >= 1,
678 "Over one chunk should be at least 1 chunk"
679 );
680
681 let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
683 let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
684 assert_eq!(
685 estimate.chunk_count, 2,
686 "Exactly two chunks' worth should be 2 chunks, not 3"
687 );
688 }
689
690 #[test]
691 fn test_from_plaintext_size_handles_extremely_large_inputs() {
692 let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
693 assert_eq!(estimate.chunk_count, u32::MAX);
694 assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
695 }
696
697 #[test]
698 #[cfg(unix)]
699 fn test_visit_files_skips_symlink_paths() {
700 use std::collections::HashSet;
701 use std::os::unix::fs::symlink;
702 use tempfile::TempDir;
703
704 let src = TempDir::new().unwrap();
705 let outside = TempDir::new().unwrap();
706
707 std::fs::write(src.path().join("root.txt"), "root").unwrap();
708 std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
709 std::fs::create_dir_all(outside.path().join("nested")).unwrap();
710 std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
711
712 symlink(
713 outside.path().join("secret.txt"),
714 src.path().join("linked-file.txt"),
715 )
716 .unwrap();
717 symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
718
719 let mut visited = HashSet::new();
720 visit_files(src.path(), &mut |path, _size| {
721 visited.insert(
722 path.strip_prefix(src.path())
723 .unwrap()
724 .to_string_lossy()
725 .to_string(),
726 );
727 Ok(())
728 })
729 .unwrap();
730
731 assert!(visited.contains("root.txt"));
732 assert!(!visited.contains("linked-file.txt"));
733 assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
734 }
735}