use anyhow::{Context, Result, bail};
use frankensqlite::Row;
use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
use serde::{Deserialize, Serialize};
use std::path::Path;
pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
pub const AEAD_TAG_OVERHEAD: u64 = 16;
pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
pub const COMPRESSION_RATIO: f64 = 0.45;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SizeEstimate {
pub plaintext_bytes: u64,
pub compressed_bytes: u64,
pub encrypted_bytes: u64,
pub static_assets_bytes: u64,
pub total_site_bytes: u64,
pub chunk_count: u32,
pub conversation_count: u64,
pub message_count: u64,
}
impl SizeEstimate {
pub fn from_database<P: AsRef<Path>>(
db_path: P,
agents: Option<&[String]>,
since_ts: Option<i64>,
until_ts: Option<i64>,
) -> Result<Self> {
let conn = super::open_existing_sqlite_db(db_path.as_ref())
.context("Failed to open database for size estimation")?;
conn.execute("PRAGMA busy_timeout = 5000;")?;
let mut conditions = Vec::new();
let mut param_values: Vec<ParamValue> = Vec::new();
if let Some(agents) = agents {
if agents.is_empty() {
conditions.push("1=0".to_string());
} else {
let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
conditions.push(format!(
"c.agent_id IN (SELECT a.id FROM agents a WHERE a.slug IN ({}))",
placeholders.join(", ")
));
for agent in agents {
param_values.push(ParamValue::from(agent.as_str()));
}
}
}
if let Some(since) = since_ts {
conditions.push("c.started_at >= ?".to_string());
param_values.push(ParamValue::from(since));
}
if let Some(until) = until_ts {
conditions.push("c.started_at <= ?".to_string());
param_values.push(ParamValue::from(until));
}
let where_clause = if conditions.is_empty() {
String::new()
} else {
format!(" WHERE {}", conditions.join(" AND "))
};
let params_slice = ¶m_values;
let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
let conversation_count: u64 = conn
.query_row_map(&conv_sql, params_slice, |row: &Row| {
row.get_typed::<Option<i64>>(0)
.map(|opt| opt.unwrap_or(0).max(0) as u64)
})
.with_context(|| {
format!("Failed to count conversations for size estimate: {conv_sql}")
})?;
let msg_sql = format!(
"SELECT COUNT(*), SUM(LENGTH(m.content))
FROM messages m
JOIN conversations c ON m.conversation_id = c.id
{}",
where_clause
);
let (message_count, plaintext_bytes): (u64, u64) = conn
.query_row_map(&msg_sql, params_slice, |row: &Row| {
let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
Ok((
raw_message_count.max(0) as u64,
raw_plaintext_bytes.max(0) as u64,
))
})
.with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
}
pub fn from_plaintext_size(
plaintext_bytes: u64,
conversation_count: u64,
message_count: u64,
) -> Result<Self> {
let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
let aead_overhead = u64::from(chunk_count)
.checked_mul(AEAD_TAG_OVERHEAD)
.ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
let encrypted_bytes = compressed_bytes
.checked_add(aead_overhead)
.ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
let total_site_bytes = encrypted_bytes
.checked_add(STATIC_ASSETS_SIZE)
.ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
Ok(Self {
plaintext_bytes,
compressed_bytes,
encrypted_bytes,
static_assets_bytes: STATIC_ASSETS_SIZE,
total_site_bytes,
chunk_count,
conversation_count,
message_count,
})
}
pub fn check_limits(&self) -> SizeLimitResult {
if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
actual: self.total_site_bytes,
limit: MAX_SITE_SIZE_BYTES,
});
}
if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
actual: self.total_site_bytes,
limit: MAX_SITE_SIZE_BYTES,
percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
as u8,
});
}
SizeLimitResult::Ok
}
pub fn format_display(&self) -> String {
format!(
"Estimated bundle size: {}\n\
• Payload: {} ({} chunks × {} max)\n\
• Static assets: {}\n\
• Compression ratio: ~{:.0}%\n\
• Conversations: {}\n\
• Messages: {}",
format_bytes(self.total_site_bytes),
format_bytes(self.encrypted_bytes),
self.chunk_count,
format_bytes(DEFAULT_CHUNK_SIZE),
format_bytes(self.static_assets_bytes),
COMPRESSION_RATIO * 100.0,
self.conversation_count,
self.message_count,
)
}
}
#[derive(Debug, Clone)]
pub enum SizeLimitResult {
Ok,
Warning(SizeWarning),
ExceedsLimit(SizeError),
}
impl SizeLimitResult {
pub fn is_ok(&self) -> bool {
matches!(self, SizeLimitResult::Ok)
}
pub fn is_warning(&self) -> bool {
matches!(self, SizeLimitResult::Warning(_))
}
pub fn is_error(&self) -> bool {
matches!(self, SizeLimitResult::ExceedsLimit(_))
}
}
#[derive(Debug, Clone, thiserror::Error)]
pub enum SizeError {
#[error(
"Total size ({}) exceeds GitHub Pages limit ({})\n\n\
Suggestions:\n\
• Use --since \"90 days ago\" for recent conversations only\n\
• Use --agents <name> to limit to specific agents\n\
• Use --workspaces <path> to limit projects",
format_bytes(*actual),
format_bytes(*limit)
)]
TotalExceedsLimit { actual: u64, limit: u64 },
#[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
FileExceedsLimit {
path: String,
actual: u64,
limit: u64,
},
}
#[derive(Debug, Clone)]
pub enum SizeWarning {
ApproachingLimit {
actual: u64,
limit: u64,
percentage: u8,
},
LargeFile { path: String, size: u64 },
}
impl std::fmt::Display for SizeWarning {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SizeWarning::ApproachingLimit {
actual,
limit,
percentage,
} => {
write!(
f,
"Estimated size {} is {}% of GitHub Pages limit ({})",
format_bytes(*actual),
percentage,
format_bytes(*limit)
)
}
SizeWarning::LargeFile { path, size } => {
write!(f, "Large file: {} ({})", path, format_bytes(*size))
}
}
}
}
pub struct BundleVerifier;
impl BundleVerifier {
pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
let site_dir = site_dir.as_ref();
let mut warnings = Vec::new();
let mut total_size = 0u64;
visit_files(site_dir, &mut |path, size| {
total_size += size;
if size > MAX_FILE_SIZE_BYTES {
bail!(
"File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
path.display(),
format_bytes(size),
format_bytes(MAX_FILE_SIZE_BYTES)
);
}
if size > FILE_SIZE_WARNING_BYTES {
let rel_path = path
.strip_prefix(site_dir)
.unwrap_or(path)
.to_string_lossy()
.to_string();
warnings.push(SizeWarning::LargeFile {
path: rel_path,
size,
});
}
Ok(())
})?;
if total_size > MAX_SITE_SIZE_BYTES {
bail!(
"Total bundle size ({}) exceeds GitHub Pages limit ({})",
format_bytes(total_size),
format_bytes(MAX_SITE_SIZE_BYTES)
);
}
if total_size > SITE_SIZE_WARNING_BYTES {
warnings.push(SizeWarning::ApproachingLimit {
actual: total_size,
limit: MAX_SITE_SIZE_BYTES,
percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
});
}
Ok(warnings)
}
}
fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
where
F: FnMut(&Path, u64) -> Result<()>,
{
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
let metadata = std::fs::symlink_metadata(&path)?;
let file_type = metadata.file_type();
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
visit_files(&path, f)?;
} else if file_type.is_file() {
f(&path, metadata.len())?;
}
}
Ok(())
}
fn format_bytes(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = 1024 * KB;
const GB: u64 = 1024 * MB;
if bytes >= GB {
format!("{:.1} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.1} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.1} KB", bytes as f64 / KB as f64)
} else {
format!("{} bytes", bytes)
}
}
#[cfg(test)]
mod tests {
use super::*;
use frankensqlite::Connection;
#[test]
fn test_size_estimate_from_plaintext() {
let estimate = SizeEstimate::from_plaintext_size(
10 * 1024 * 1024, 100,
5000,
)
.unwrap();
assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
assert_eq!(estimate.conversation_count, 100);
assert_eq!(estimate.message_count, 5000);
assert!(estimate.chunk_count >= 1);
}
#[test]
fn test_size_estimate_empty() {
let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
assert_eq!(estimate.plaintext_bytes, 0);
assert_eq!(estimate.chunk_count, 1); assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
}
#[test]
fn test_size_limit_ok() {
let estimate = SizeEstimate::from_plaintext_size(
100 * 1024 * 1024, 100,
5000,
)
.unwrap();
let result = estimate.check_limits();
assert!(result.is_ok());
}
#[test]
fn test_size_limit_warning() {
let estimate = SizeEstimate::from_plaintext_size(
2000 * 1024 * 1024, 1000,
50000,
)
.unwrap();
let result = estimate.check_limits();
assert!(result.is_warning() || result.is_error());
}
#[test]
fn test_size_limit_exceeded() {
let estimate = SizeEstimate::from_plaintext_size(
3000 * 1024 * 1024, 5000,
250000,
)
.unwrap();
let result = estimate.check_limits();
assert!(result.is_error());
}
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(500), "500 bytes");
assert_eq!(format_bytes(1024), "1.0 KB");
assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
}
#[test]
fn test_format_display() {
let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
let display = estimate.format_display();
assert!(display.contains("Estimated bundle size"));
assert!(display.contains("Conversations: 50"));
assert!(display.contains("Messages: 2500"));
}
#[test]
fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
let temp = tempfile::TempDir::new()?;
let db_path = temp.path().join("cass.db");
let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
conn.execute_batch(
"CREATE TABLE agents (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL
);
CREATE TABLE conversations (
id INTEGER PRIMARY KEY,
agent_id INTEGER NOT NULL,
started_at INTEGER
);
CREATE TABLE messages (
id INTEGER PRIMARY KEY,
conversation_id INTEGER NOT NULL,
content TEXT NOT NULL
);
INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
INSERT INTO conversations (id, agent_id, started_at)
VALUES (10, 1, 1000), (20, 2, 2000);
INSERT INTO messages (id, conversation_id, content)
VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
)?;
let all = SizeEstimate::from_database(&db_path, None, None, None)?;
assert_eq!(all.conversation_count, 2);
assert_eq!(all.message_count, 2);
assert_eq!(all.plaintext_bytes, 14);
let codex =
SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
assert_eq!(codex.conversation_count, 1);
assert_eq!(codex.message_count, 1);
assert_eq!(codex.plaintext_bytes, 9);
let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
assert_eq!(empty_agent_filter.conversation_count, 0);
assert_eq!(empty_agent_filter.message_count, 0);
assert_eq!(empty_agent_filter.plaintext_bytes, 0);
let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
assert_eq!(recent.conversation_count, 1);
assert_eq!(recent.message_count, 1);
assert_eq!(recent.plaintext_bytes, 9);
Ok(())
}
#[test]
fn test_from_database_allows_read_only_source_db() -> Result<()> {
let temp = tempfile::TempDir::new()?;
let db_path = temp.path().join("cass-read-only.db");
let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
conn.execute_batch(
"CREATE TABLE agents (
id INTEGER PRIMARY KEY,
slug TEXT NOT NULL
);
CREATE TABLE conversations (
id INTEGER PRIMARY KEY,
agent_id INTEGER NOT NULL,
started_at INTEGER
);
CREATE TABLE messages (
id INTEGER PRIMARY KEY,
conversation_id INTEGER NOT NULL,
content TEXT NOT NULL
);
INSERT INTO agents (id, slug) VALUES (1, 'claude');
INSERT INTO conversations (id, agent_id, started_at) VALUES (10, 1, 1000);
INSERT INTO messages (id, conversation_id, content) VALUES (100, 10, 'readonly');",
)?;
drop(conn);
let original_permissions = std::fs::metadata(&db_path)?.permissions();
let mut read_only_permissions = original_permissions.clone();
read_only_permissions.set_readonly(true);
std::fs::set_permissions(&db_path, read_only_permissions)?;
let estimate = SizeEstimate::from_database(&db_path, None, None, None);
std::fs::set_permissions(&db_path, original_permissions)?;
let estimate = estimate?;
assert_eq!(estimate.conversation_count, 1);
assert_eq!(estimate.message_count, 1);
assert_eq!(estimate.plaintext_bytes, 8);
Ok(())
}
#[test]
fn test_size_error_display() {
let err = SizeError::TotalExceedsLimit {
actual: 2 * 1024 * 1024 * 1024,
limit: 1024 * 1024 * 1024,
};
let msg = err.to_string();
assert!(msg.contains("2.0 GB"));
assert!(msg.contains("1.0 GB"));
assert!(msg.contains("Suggestions"));
}
#[test]
fn test_size_error_display_and_source_are_preserved() {
let cases = vec![
(
SizeError::TotalExceedsLimit {
actual: 2 * 1024 * 1024 * 1024,
limit: 1024 * 1024 * 1024,
},
"Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
Suggestions:\n\
• Use --since \"90 days ago\" for recent conversations only\n\
• Use --agents <name> to limit to specific agents\n\
• Use --workspaces <path> to limit projects",
),
(
SizeError::FileExceedsLimit {
path: "site/archive.bin".to_string(),
actual: 150 * 1024 * 1024,
limit: 100 * 1024 * 1024,
},
"File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
),
];
for (error, expected_display) in cases {
assert_eq!(error.to_string(), expected_display);
assert!(std::error::Error::source(&error).is_none());
}
}
#[test]
fn test_bundle_verifier() {
use tempfile::TempDir;
let temp = TempDir::new().unwrap();
std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
let warnings = BundleVerifier::verify(temp.path()).unwrap();
assert!(warnings.is_empty()); }
#[test]
fn test_chunk_count_ceiling_division() {
let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
assert_eq!(
estimate.chunk_count, 1,
"Exactly one chunk's worth should be 1 chunk, not 2"
);
let over_one_chunk = one_chunk_plaintext + 1000000; let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
assert!(
estimate.chunk_count >= 1,
"Over one chunk should be at least 1 chunk"
);
let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
assert_eq!(
estimate.chunk_count, 2,
"Exactly two chunks' worth should be 2 chunks, not 3"
);
}
#[test]
fn test_from_plaintext_size_handles_extremely_large_inputs() {
let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
assert_eq!(estimate.chunk_count, u32::MAX);
assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
}
#[test]
#[cfg(unix)]
fn test_visit_files_skips_symlink_paths() {
use std::collections::HashSet;
use std::os::unix::fs::symlink;
use tempfile::TempDir;
let src = TempDir::new().unwrap();
let outside = TempDir::new().unwrap();
std::fs::write(src.path().join("root.txt"), "root").unwrap();
std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
std::fs::create_dir_all(outside.path().join("nested")).unwrap();
std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
symlink(
outside.path().join("secret.txt"),
src.path().join("linked-file.txt"),
)
.unwrap();
symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
let mut visited = HashSet::new();
visit_files(src.path(), &mut |path, _size| {
visited.insert(
path.strip_prefix(src.path())
.unwrap()
.to_string_lossy()
.to_string(),
);
Ok(())
})
.unwrap();
assert!(visited.contains("root.txt"));
assert!(!visited.contains("linked-file.txt"));
assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
}
}