1use anyhow::{Context, Result, bail};
7use frankensqlite::Connection;
8use frankensqlite::Row;
9use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
10use serde::{Deserialize, Serialize};
11use std::path::Path;
12
13pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
15
16pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
18
19pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
21
22pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
24
25pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
27
28pub const AEAD_TAG_OVERHEAD: u64 = 16;
30
31pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
33
34pub const COMPRESSION_RATIO: f64 = 0.45;
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SizeEstimate {
40 pub plaintext_bytes: u64,
42 pub compressed_bytes: u64,
44 pub encrypted_bytes: u64,
46 pub static_assets_bytes: u64,
48 pub total_site_bytes: u64,
50 pub chunk_count: u32,
52 pub conversation_count: u64,
54 pub message_count: u64,
56}
57
58impl SizeEstimate {
59 pub fn from_database<P: AsRef<Path>>(
61 db_path: P,
62 agents: Option<&[String]>,
63 since_ts: Option<i64>,
64 until_ts: Option<i64>,
65 ) -> Result<Self> {
66 let conn = Connection::open(db_path.as_ref().to_string_lossy().as_ref())
67 .context("Failed to open database for size estimation")?;
68
69 conn.execute_batch(
70 "PRAGMA busy_timeout = 5000;
71 PRAGMA journal_mode = WAL;",
72 )?;
73
74 let mut conditions = Vec::new();
76 let mut param_values: Vec<ParamValue> = Vec::new();
77
78 if let Some(agents) = agents {
79 if agents.is_empty() {
80 conditions.push("1=0".to_string());
81 } else {
82 let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
83 conditions.push(format!(
84 "EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug IN ({}))",
85 placeholders.join(", ")
86 ));
87 for agent in agents {
88 param_values.push(ParamValue::from(agent.as_str()));
89 }
90 }
91 }
92
93 if let Some(since) = since_ts {
94 conditions.push("c.started_at >= ?".to_string());
95 param_values.push(ParamValue::from(since));
96 }
97
98 if let Some(until) = until_ts {
99 conditions.push("c.started_at <= ?".to_string());
100 param_values.push(ParamValue::from(until));
101 }
102
103 let where_clause = if conditions.is_empty() {
104 String::new()
105 } else {
106 format!(" WHERE {}", conditions.join(" AND "))
107 };
108
109 let params_slice = ¶m_values;
110
111 let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
113 let conversation_count: u64 = conn
114 .query_row_map(&conv_sql, params_slice, |row: &Row| {
115 row.get_typed::<i64>(0).map(|v| v.max(0) as u64)
116 })
117 .with_context(|| {
118 format!("Failed to count conversations for size estimate: {conv_sql}")
119 })?;
120
121 let msg_sql = format!(
123 "SELECT COUNT(*), SUM(LENGTH(m.content))
124 FROM messages m
125 JOIN conversations c ON m.conversation_id = c.id
126 {}",
127 where_clause
128 );
129 let (message_count, plaintext_bytes): (u64, u64) = conn
130 .query_row_map(&msg_sql, params_slice, |row: &Row| {
131 let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
132 let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
133 Ok((
134 raw_message_count.max(0) as u64,
135 raw_plaintext_bytes.max(0) as u64,
136 ))
137 })
138 .with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
139
140 Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
141 }
142
143 pub fn from_plaintext_size(
145 plaintext_bytes: u64,
146 conversation_count: u64,
147 message_count: u64,
148 ) -> Result<Self> {
149 let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
151
152 let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
154 let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
155
156 let aead_overhead = u64::from(chunk_count)
158 .checked_mul(AEAD_TAG_OVERHEAD)
159 .ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
160 let encrypted_bytes = compressed_bytes
161 .checked_add(aead_overhead)
162 .ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
163
164 let total_site_bytes = encrypted_bytes
166 .checked_add(STATIC_ASSETS_SIZE)
167 .ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
168
169 Ok(Self {
170 plaintext_bytes,
171 compressed_bytes,
172 encrypted_bytes,
173 static_assets_bytes: STATIC_ASSETS_SIZE,
174 total_site_bytes,
175 chunk_count,
176 conversation_count,
177 message_count,
178 })
179 }
180
181 pub fn check_limits(&self) -> SizeLimitResult {
183 if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
184 return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
185 actual: self.total_site_bytes,
186 limit: MAX_SITE_SIZE_BYTES,
187 });
188 }
189
190 if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
191 return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
192 actual: self.total_site_bytes,
193 limit: MAX_SITE_SIZE_BYTES,
194 percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
195 as u8,
196 });
197 }
198
199 SizeLimitResult::Ok
200 }
201
202 pub fn format_display(&self) -> String {
204 format!(
205 "Estimated bundle size: {}\n\
206 • Payload: {} ({} chunks × {} max)\n\
207 • Static assets: {}\n\
208 • Compression ratio: ~{:.0}%\n\
209 • Conversations: {}\n\
210 • Messages: {}",
211 format_bytes(self.total_site_bytes),
212 format_bytes(self.encrypted_bytes),
213 self.chunk_count,
214 format_bytes(DEFAULT_CHUNK_SIZE),
215 format_bytes(self.static_assets_bytes),
216 COMPRESSION_RATIO * 100.0,
217 self.conversation_count,
218 self.message_count,
219 )
220 }
221}
222
223#[derive(Debug, Clone)]
225pub enum SizeLimitResult {
226 Ok,
228 Warning(SizeWarning),
230 ExceedsLimit(SizeError),
232}
233
234impl SizeLimitResult {
235 pub fn is_ok(&self) -> bool {
237 matches!(self, SizeLimitResult::Ok)
238 }
239
240 pub fn is_warning(&self) -> bool {
242 matches!(self, SizeLimitResult::Warning(_))
243 }
244
245 pub fn is_error(&self) -> bool {
247 matches!(self, SizeLimitResult::ExceedsLimit(_))
248 }
249}
250
251#[derive(Debug, Clone, thiserror::Error)]
253pub enum SizeError {
254 #[error(
256 "Total size ({}) exceeds GitHub Pages limit ({})\n\n\
257 Suggestions:\n\
258 • Use --since \"90 days ago\" for recent conversations only\n\
259 • Use --agents <name> to limit to specific agents\n\
260 • Use --workspaces <path> to limit projects",
261 format_bytes(*actual),
262 format_bytes(*limit)
263 )]
264 TotalExceedsLimit { actual: u64, limit: u64 },
265 #[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
267 FileExceedsLimit {
268 path: String,
269 actual: u64,
270 limit: u64,
271 },
272}
273
274#[derive(Debug, Clone)]
276pub enum SizeWarning {
277 ApproachingLimit {
279 actual: u64,
280 limit: u64,
281 percentage: u8,
282 },
283 LargeFile { path: String, size: u64 },
285}
286
287impl std::fmt::Display for SizeWarning {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 match self {
290 SizeWarning::ApproachingLimit {
291 actual,
292 limit,
293 percentage,
294 } => {
295 write!(
296 f,
297 "Estimated size {} is {}% of GitHub Pages limit ({})",
298 format_bytes(*actual),
299 percentage,
300 format_bytes(*limit)
301 )
302 }
303 SizeWarning::LargeFile { path, size } => {
304 write!(f, "Large file: {} ({})", path, format_bytes(*size))
305 }
306 }
307 }
308}
309
310pub struct BundleVerifier;
312
313impl BundleVerifier {
314 pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
316 let site_dir = site_dir.as_ref();
317 let mut warnings = Vec::new();
318 let mut total_size = 0u64;
319
320 visit_files(site_dir, &mut |path, size| {
321 total_size += size;
322
323 if size > MAX_FILE_SIZE_BYTES {
324 bail!(
325 "File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
326 path.display(),
327 format_bytes(size),
328 format_bytes(MAX_FILE_SIZE_BYTES)
329 );
330 }
331
332 if size > FILE_SIZE_WARNING_BYTES {
333 let rel_path = path
334 .strip_prefix(site_dir)
335 .unwrap_or(path)
336 .to_string_lossy()
337 .to_string();
338 warnings.push(SizeWarning::LargeFile {
339 path: rel_path,
340 size,
341 });
342 }
343
344 Ok(())
345 })?;
346
347 if total_size > MAX_SITE_SIZE_BYTES {
348 bail!(
349 "Total bundle size ({}) exceeds GitHub Pages limit ({})",
350 format_bytes(total_size),
351 format_bytes(MAX_SITE_SIZE_BYTES)
352 );
353 }
354
355 if total_size > SITE_SIZE_WARNING_BYTES {
356 warnings.push(SizeWarning::ApproachingLimit {
357 actual: total_size,
358 limit: MAX_SITE_SIZE_BYTES,
359 percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
360 });
361 }
362
363 Ok(warnings)
364 }
365}
366
367fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
369where
370 F: FnMut(&Path, u64) -> Result<()>,
371{
372 for entry in std::fs::read_dir(dir)? {
373 let entry = entry?;
374 let path = entry.path();
375 let metadata = std::fs::symlink_metadata(&path)?;
376 let file_type = metadata.file_type();
377
378 if file_type.is_symlink() {
379 continue;
380 }
381
382 if file_type.is_dir() {
383 visit_files(&path, f)?;
384 } else if file_type.is_file() {
385 f(&path, metadata.len())?;
386 }
387 }
388 Ok(())
389}
390
391fn format_bytes(bytes: u64) -> String {
393 const KB: u64 = 1024;
394 const MB: u64 = 1024 * KB;
395 const GB: u64 = 1024 * MB;
396
397 if bytes >= GB {
398 format!("{:.1} GB", bytes as f64 / GB as f64)
399 } else if bytes >= MB {
400 format!("{:.1} MB", bytes as f64 / MB as f64)
401 } else if bytes >= KB {
402 format!("{:.1} KB", bytes as f64 / KB as f64)
403 } else {
404 format!("{} bytes", bytes)
405 }
406}
407
408#[cfg(test)]
409mod tests {
410 use super::*;
411
412 #[test]
413 fn test_size_estimate_from_plaintext() {
414 let estimate = SizeEstimate::from_plaintext_size(
415 10 * 1024 * 1024, 100,
417 5000,
418 )
419 .unwrap();
420
421 assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
423 assert_eq!(estimate.conversation_count, 100);
424 assert_eq!(estimate.message_count, 5000);
425 assert!(estimate.chunk_count >= 1);
426 }
427
428 #[test]
429 fn test_size_estimate_empty() {
430 let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
431 assert_eq!(estimate.plaintext_bytes, 0);
432 assert_eq!(estimate.chunk_count, 1); assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
434 }
435
436 #[test]
437 fn test_size_limit_ok() {
438 let estimate = SizeEstimate::from_plaintext_size(
439 100 * 1024 * 1024, 100,
441 5000,
442 )
443 .unwrap();
444
445 let result = estimate.check_limits();
446 assert!(result.is_ok());
447 }
448
449 #[test]
450 fn test_size_limit_warning() {
451 let estimate = SizeEstimate::from_plaintext_size(
454 2000 * 1024 * 1024, 1000,
456 50000,
457 )
458 .unwrap();
459
460 let result = estimate.check_limits();
461 assert!(result.is_warning() || result.is_error());
462 }
463
464 #[test]
465 fn test_size_limit_exceeded() {
466 let estimate = SizeEstimate::from_plaintext_size(
467 3000 * 1024 * 1024, 5000,
469 250000,
470 )
471 .unwrap();
472
473 let result = estimate.check_limits();
474 assert!(result.is_error());
475 }
476
477 #[test]
478 fn test_format_bytes() {
479 assert_eq!(format_bytes(500), "500 bytes");
480 assert_eq!(format_bytes(1024), "1.0 KB");
481 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
482 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
483 assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
484 }
485
486 #[test]
487 fn test_format_display() {
488 let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
489
490 let display = estimate.format_display();
491 assert!(display.contains("Estimated bundle size"));
492 assert!(display.contains("Conversations: 50"));
493 assert!(display.contains("Messages: 2500"));
494 }
495
496 #[test]
497 fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
498 let temp = tempfile::TempDir::new()?;
499 let db_path = temp.path().join("cass.db");
500 let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
501 conn.execute_batch(
502 "CREATE TABLE agents (
503 id INTEGER PRIMARY KEY,
504 slug TEXT NOT NULL
505 );
506 CREATE TABLE conversations (
507 id INTEGER PRIMARY KEY,
508 agent_id INTEGER NOT NULL,
509 started_at INTEGER
510 );
511 CREATE TABLE messages (
512 id INTEGER PRIMARY KEY,
513 conversation_id INTEGER NOT NULL,
514 content TEXT NOT NULL
515 );
516 INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
517 INSERT INTO conversations (id, agent_id, started_at)
518 VALUES (10, 1, 1000), (20, 2, 2000);
519 INSERT INTO messages (id, conversation_id, content)
520 VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
521 )?;
522
523 let all = SizeEstimate::from_database(&db_path, None, None, None)?;
524 assert_eq!(all.conversation_count, 2);
525 assert_eq!(all.message_count, 2);
526 assert_eq!(all.plaintext_bytes, 14);
527
528 let codex =
529 SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
530 assert_eq!(codex.conversation_count, 1);
531 assert_eq!(codex.message_count, 1);
532 assert_eq!(codex.plaintext_bytes, 9);
533
534 let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
535 assert_eq!(empty_agent_filter.conversation_count, 0);
536 assert_eq!(empty_agent_filter.message_count, 0);
537 assert_eq!(empty_agent_filter.plaintext_bytes, 0);
538
539 let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
540 assert_eq!(recent.conversation_count, 1);
541 assert_eq!(recent.message_count, 1);
542 assert_eq!(recent.plaintext_bytes, 9);
543
544 Ok(())
545 }
546
547 #[test]
548 fn test_size_error_display() {
549 let err = SizeError::TotalExceedsLimit {
550 actual: 2 * 1024 * 1024 * 1024,
551 limit: 1024 * 1024 * 1024,
552 };
553
554 let msg = err.to_string();
555 assert!(msg.contains("2.0 GB"));
556 assert!(msg.contains("1.0 GB"));
557 assert!(msg.contains("Suggestions"));
558 }
559
560 #[test]
561 fn test_size_error_display_and_source_are_preserved() {
562 let cases = vec![
563 (
564 SizeError::TotalExceedsLimit {
565 actual: 2 * 1024 * 1024 * 1024,
566 limit: 1024 * 1024 * 1024,
567 },
568 "Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
569 Suggestions:\n\
570 • Use --since \"90 days ago\" for recent conversations only\n\
571 • Use --agents <name> to limit to specific agents\n\
572 • Use --workspaces <path> to limit projects",
573 ),
574 (
575 SizeError::FileExceedsLimit {
576 path: "site/archive.bin".to_string(),
577 actual: 150 * 1024 * 1024,
578 limit: 100 * 1024 * 1024,
579 },
580 "File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
581 ),
582 ];
583
584 for (error, expected_display) in cases {
585 assert_eq!(error.to_string(), expected_display);
586 assert!(std::error::Error::source(&error).is_none());
587 }
588 }
589
590 #[test]
591 fn test_bundle_verifier() {
592 use tempfile::TempDir;
593
594 let temp = TempDir::new().unwrap();
595
596 std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
598 std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
599
600 let warnings = BundleVerifier::verify(temp.path()).unwrap();
601 assert!(warnings.is_empty()); }
603
604 #[test]
605 fn test_chunk_count_ceiling_division() {
606 let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
611 assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
612
613 let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
617 let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
618 assert_eq!(
621 estimate.chunk_count, 1,
622 "Exactly one chunk's worth should be 1 chunk, not 2"
623 );
624
625 let over_one_chunk = one_chunk_plaintext + 1000000; let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
628 assert!(
629 estimate.chunk_count >= 1,
630 "Over one chunk should be at least 1 chunk"
631 );
632
633 let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
635 let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
636 assert_eq!(
637 estimate.chunk_count, 2,
638 "Exactly two chunks' worth should be 2 chunks, not 3"
639 );
640 }
641
642 #[test]
643 fn test_from_plaintext_size_handles_extremely_large_inputs() {
644 let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
645 assert_eq!(estimate.chunk_count, u32::MAX);
646 assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
647 }
648
649 #[test]
650 #[cfg(unix)]
651 fn test_visit_files_skips_symlink_paths() {
652 use std::collections::HashSet;
653 use std::os::unix::fs::symlink;
654 use tempfile::TempDir;
655
656 let src = TempDir::new().unwrap();
657 let outside = TempDir::new().unwrap();
658
659 std::fs::write(src.path().join("root.txt"), "root").unwrap();
660 std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
661 std::fs::create_dir_all(outside.path().join("nested")).unwrap();
662 std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
663
664 symlink(
665 outside.path().join("secret.txt"),
666 src.path().join("linked-file.txt"),
667 )
668 .unwrap();
669 symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
670
671 let mut visited = HashSet::new();
672 visit_files(src.path(), &mut |path, _size| {
673 visited.insert(
674 path.strip_prefix(src.path())
675 .unwrap()
676 .to_string_lossy()
677 .to_string(),
678 );
679 Ok(())
680 })
681 .unwrap();
682
683 assert!(visited.contains("root.txt"));
684 assert!(!visited.contains("linked-file.txt"));
685 assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
686 }
687}