Skip to main content

coding_agent_search/pages/
size.rs

1//! Bundle size estimation and limits enforcement.
2//!
3//! Provides pre-export size estimation to warn users before they spend time
4//! exporting/encrypting data that would exceed GitHub Pages limits.
5
6use anyhow::{Context, Result, bail};
7use frankensqlite::Row;
8use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12/// Maximum site size for GitHub Pages (1 GB)
13pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
14
15/// Warning threshold for total site size (900 MB - approaching limit)
16pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
17
18/// Maximum file size for GitHub (100 MiB)
19pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
20
21/// Warning threshold for file size (50 MiB)
22pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
23
24/// Default chunk size for encrypted payload (8 MiB)
25pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
26
27/// AEAD authentication tag overhead per chunk (16 bytes)
28pub const AEAD_TAG_OVERHEAD: u64 = 16;
29
30/// Estimated static assets size (HTML, JS, CSS, WASM vendor) - approximately 2 MB
31pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
32
33/// Typical compression ratio for text content (deflate)
34pub const COMPRESSION_RATIO: f64 = 0.45;
35
36/// Pre-export size estimate
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SizeEstimate {
39    /// Raw content size in bytes (uncompressed)
40    pub plaintext_bytes: u64,
41    /// Estimated compressed size in bytes
42    pub compressed_bytes: u64,
43    /// Estimated encrypted size in bytes (with AEAD overhead)
44    pub encrypted_bytes: u64,
45    /// Static assets size (HTML, JS, CSS, WASM)
46    pub static_assets_bytes: u64,
47    /// Total estimated site size
48    pub total_site_bytes: u64,
49    /// Estimated number of payload chunks
50    pub chunk_count: u32,
51    /// Number of conversations included
52    pub conversation_count: u64,
53    /// Number of messages included
54    pub message_count: u64,
55}
56
57impl SizeEstimate {
58    /// Create a size estimate from a database and filter
59    pub fn from_database<P: AsRef<Path>>(
60        db_path: P,
61        agents: Option<&[String]>,
62        since_ts: Option<i64>,
63        until_ts: Option<i64>,
64    ) -> Result<Self> {
65        let conn = super::open_existing_sqlite_db(db_path.as_ref())
66            .context("Failed to open database for size estimation")?;
67
68        conn.execute("PRAGMA busy_timeout = 5000;")?;
69
70        // Build filter conditions
71        let mut conditions = Vec::new();
72        let mut param_values: Vec<ParamValue> = Vec::new();
73
74        if let Some(agents) = agents {
75            if agents.is_empty() {
76                conditions.push("1=0".to_string());
77            } else {
78                let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
79                // Keep this as a non-correlated subquery. The file-backed
80                // read-only pages path still resolves this shape reliably,
81                // while the equivalent correlated EXISTS can under-match.
82                conditions.push(format!(
83                    "c.agent_id IN (SELECT a.id FROM agents a WHERE a.slug IN ({}))",
84                    placeholders.join(", ")
85                ));
86                for agent in agents {
87                    param_values.push(ParamValue::from(agent.as_str()));
88                }
89            }
90        }
91
92        if let Some(since) = since_ts {
93            conditions.push("c.started_at >= ?".to_string());
94            param_values.push(ParamValue::from(since));
95        }
96
97        if let Some(until) = until_ts {
98            conditions.push("c.started_at <= ?".to_string());
99            param_values.push(ParamValue::from(until));
100        }
101
102        let where_clause = if conditions.is_empty() {
103            String::new()
104        } else {
105            format!(" WHERE {}", conditions.join(" AND "))
106        };
107
108        let params_slice = &param_values;
109
110        // Query conversation count. We read the COUNT(*) cell as Option<i64>
111        // because frankensqlite can return NULL from `COUNT(*)` when the
112        // WHERE clause excludes all rows (e.g., the empty-agent-filter "1=0"
113        // path) — standard SQLite returns 0, fsqlite currently returns NULL.
114        // The Option<i64>.unwrap_or(0) shim absorbs the difference without a
115        // type-mismatch panic.
116        let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
117        let conversation_count: u64 = conn
118            .query_row_map(&conv_sql, params_slice, |row: &Row| {
119                row.get_typed::<Option<i64>>(0)
120                    .map(|opt| opt.unwrap_or(0).max(0) as u64)
121            })
122            .with_context(|| {
123                format!("Failed to count conversations for size estimate: {conv_sql}")
124            })?;
125
126        // Query message count and content size
127        let msg_sql = format!(
128            "SELECT COUNT(*), SUM(LENGTH(m.content))
129             FROM messages m
130             JOIN conversations c ON m.conversation_id = c.id
131             {}",
132            where_clause
133        );
134        let (message_count, plaintext_bytes): (u64, u64) = conn
135            .query_row_map(&msg_sql, params_slice, |row: &Row| {
136                let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
137                let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
138                Ok((
139                    raw_message_count.max(0) as u64,
140                    raw_plaintext_bytes.max(0) as u64,
141                ))
142            })
143            .with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
144
145        Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
146    }
147
148    /// Create estimate from known plaintext size
149    pub fn from_plaintext_size(
150        plaintext_bytes: u64,
151        conversation_count: u64,
152        message_count: u64,
153    ) -> Result<Self> {
154        // Estimate compression
155        let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
156
157        // Calculate chunk count (minimum of 1 chunk even for empty content)
158        let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
159        let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
160
161        // Add AEAD overhead
162        let aead_overhead = u64::from(chunk_count)
163            .checked_mul(AEAD_TAG_OVERHEAD)
164            .ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
165        let encrypted_bytes = compressed_bytes
166            .checked_add(aead_overhead)
167            .ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
168
169        // Total with static assets
170        let total_site_bytes = encrypted_bytes
171            .checked_add(STATIC_ASSETS_SIZE)
172            .ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
173
174        Ok(Self {
175            plaintext_bytes,
176            compressed_bytes,
177            encrypted_bytes,
178            static_assets_bytes: STATIC_ASSETS_SIZE,
179            total_site_bytes,
180            chunk_count,
181            conversation_count,
182            message_count,
183        })
184    }
185
186    /// Check if the estimate exceeds hard limits
187    pub fn check_limits(&self) -> SizeLimitResult {
188        if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
189            return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
190                actual: self.total_site_bytes,
191                limit: MAX_SITE_SIZE_BYTES,
192            });
193        }
194
195        if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
196            return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
197                actual: self.total_site_bytes,
198                limit: MAX_SITE_SIZE_BYTES,
199                percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
200                    as u8,
201            });
202        }
203
204        SizeLimitResult::Ok
205    }
206
207    /// Format the estimate for display
208    pub fn format_display(&self) -> String {
209        format!(
210            "Estimated bundle size: {}\n\
211             • Payload: {} ({} chunks × {} max)\n\
212             • Static assets: {}\n\
213             • Compression ratio: ~{:.0}%\n\
214             • Conversations: {}\n\
215             • Messages: {}",
216            format_bytes(self.total_site_bytes),
217            format_bytes(self.encrypted_bytes),
218            self.chunk_count,
219            format_bytes(DEFAULT_CHUNK_SIZE),
220            format_bytes(self.static_assets_bytes),
221            COMPRESSION_RATIO * 100.0,
222            self.conversation_count,
223            self.message_count,
224        )
225    }
226}
227
228/// Result of checking size limits
229#[derive(Debug, Clone)]
230pub enum SizeLimitResult {
231    /// Size is within limits
232    Ok,
233    /// Size is approaching limits (warning)
234    Warning(SizeWarning),
235    /// Size exceeds limits (error)
236    ExceedsLimit(SizeError),
237}
238
239impl SizeLimitResult {
240    /// Returns true if export should proceed
241    pub fn is_ok(&self) -> bool {
242        matches!(self, SizeLimitResult::Ok)
243    }
244
245    /// Returns true if there's a warning but export can proceed
246    pub fn is_warning(&self) -> bool {
247        matches!(self, SizeLimitResult::Warning(_))
248    }
249
250    /// Returns true if export should be blocked
251    pub fn is_error(&self) -> bool {
252        matches!(self, SizeLimitResult::ExceedsLimit(_))
253    }
254}
255
256/// Size-related errors
257#[derive(Debug, Clone, thiserror::Error)]
258pub enum SizeError {
259    /// Total site size exceeds GitHub Pages limit
260    #[error(
261        "Total size ({}) exceeds GitHub Pages limit ({})\n\n\
262         Suggestions:\n\
263         • Use --since \"90 days ago\" for recent conversations only\n\
264         • Use --agents <name> to limit to specific agents\n\
265         • Use --workspaces <path> to limit projects",
266        format_bytes(*actual),
267        format_bytes(*limit)
268    )]
269    TotalExceedsLimit { actual: u64, limit: u64 },
270    /// Individual file exceeds limit
271    #[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
272    FileExceedsLimit {
273        path: String,
274        actual: u64,
275        limit: u64,
276    },
277}
278
279/// Size-related warnings
280#[derive(Debug, Clone)]
281pub enum SizeWarning {
282    /// Total size is approaching limit
283    ApproachingLimit {
284        actual: u64,
285        limit: u64,
286        percentage: u8,
287    },
288    /// Individual file is large
289    LargeFile { path: String, size: u64 },
290}
291
292impl std::fmt::Display for SizeWarning {
293    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294        match self {
295            SizeWarning::ApproachingLimit {
296                actual,
297                limit,
298                percentage,
299            } => {
300                write!(
301                    f,
302                    "Estimated size {} is {}% of GitHub Pages limit ({})",
303                    format_bytes(*actual),
304                    percentage,
305                    format_bytes(*limit)
306                )
307            }
308            SizeWarning::LargeFile { path, size } => {
309                write!(f, "Large file: {} ({})", path, format_bytes(*size))
310            }
311        }
312    }
313}
314
315/// Post-export bundle verification
316pub struct BundleVerifier;
317
318impl BundleVerifier {
319    /// Verify a bundle directory meets all size constraints
320    pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
321        let site_dir = site_dir.as_ref();
322        let mut warnings = Vec::new();
323        let mut total_size = 0u64;
324
325        visit_files(site_dir, &mut |path, size| {
326            total_size += size;
327
328            if size > MAX_FILE_SIZE_BYTES {
329                bail!(
330                    "File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
331                    path.display(),
332                    format_bytes(size),
333                    format_bytes(MAX_FILE_SIZE_BYTES)
334                );
335            }
336
337            if size > FILE_SIZE_WARNING_BYTES {
338                let rel_path = path
339                    .strip_prefix(site_dir)
340                    .unwrap_or(path)
341                    .to_string_lossy()
342                    .to_string();
343                warnings.push(SizeWarning::LargeFile {
344                    path: rel_path,
345                    size,
346                });
347            }
348
349            Ok(())
350        })?;
351
352        if total_size > MAX_SITE_SIZE_BYTES {
353            bail!(
354                "Total bundle size ({}) exceeds GitHub Pages limit ({})",
355                format_bytes(total_size),
356                format_bytes(MAX_SITE_SIZE_BYTES)
357            );
358        }
359
360        if total_size > SITE_SIZE_WARNING_BYTES {
361            warnings.push(SizeWarning::ApproachingLimit {
362                actual: total_size,
363                limit: MAX_SITE_SIZE_BYTES,
364                percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
365            });
366        }
367
368        Ok(warnings)
369    }
370}
371
372/// Visit all files in a directory recursively
373fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
374where
375    F: FnMut(&Path, u64) -> Result<()>,
376{
377    for entry in std::fs::read_dir(dir)? {
378        let entry = entry?;
379        let path = entry.path();
380        let metadata = std::fs::symlink_metadata(&path)?;
381        let file_type = metadata.file_type();
382
383        if file_type.is_symlink() {
384            continue;
385        }
386
387        if file_type.is_dir() {
388            visit_files(&path, f)?;
389        } else if file_type.is_file() {
390            f(&path, metadata.len())?;
391        }
392    }
393    Ok(())
394}
395
396/// Format bytes as human-readable string
397fn format_bytes(bytes: u64) -> String {
398    const KB: u64 = 1024;
399    const MB: u64 = 1024 * KB;
400    const GB: u64 = 1024 * MB;
401
402    if bytes >= GB {
403        format!("{:.1} GB", bytes as f64 / GB as f64)
404    } else if bytes >= MB {
405        format!("{:.1} MB", bytes as f64 / MB as f64)
406    } else if bytes >= KB {
407        format!("{:.1} KB", bytes as f64 / KB as f64)
408    } else {
409        format!("{} bytes", bytes)
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use frankensqlite::Connection;
417
418    #[test]
419    fn test_size_estimate_from_plaintext() {
420        let estimate = SizeEstimate::from_plaintext_size(
421            10 * 1024 * 1024, // 10 MB plaintext
422            100,
423            5000,
424        )
425        .unwrap();
426
427        // Should compress to ~4.5 MB
428        assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
429        assert_eq!(estimate.conversation_count, 100);
430        assert_eq!(estimate.message_count, 5000);
431        assert!(estimate.chunk_count >= 1);
432    }
433
434    #[test]
435    fn test_size_estimate_empty() {
436        let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
437        assert_eq!(estimate.plaintext_bytes, 0);
438        assert_eq!(estimate.chunk_count, 1); // At least 1 chunk
439        assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
440    }
441
442    #[test]
443    fn test_size_limit_ok() {
444        let estimate = SizeEstimate::from_plaintext_size(
445            100 * 1024 * 1024, // 100 MB - should be fine
446            100,
447            5000,
448        )
449        .unwrap();
450
451        let result = estimate.check_limits();
452        assert!(result.is_ok());
453    }
454
455    #[test]
456    fn test_size_limit_warning() {
457        // Need ~900 MB compressed to trigger warning
458        // 900 MB / 0.45 compression = 2000 MB plaintext
459        let estimate = SizeEstimate::from_plaintext_size(
460            2000 * 1024 * 1024, // 2 GB plaintext -> ~900 MB compressed
461            1000,
462            50000,
463        )
464        .unwrap();
465
466        let result = estimate.check_limits();
467        assert!(result.is_warning() || result.is_error());
468    }
469
470    #[test]
471    fn test_size_limit_exceeded() {
472        let estimate = SizeEstimate::from_plaintext_size(
473            3000 * 1024 * 1024, // 3 GB plaintext -> ~1.35 GB compressed
474            5000,
475            250000,
476        )
477        .unwrap();
478
479        let result = estimate.check_limits();
480        assert!(result.is_error());
481    }
482
483    #[test]
484    fn test_format_bytes() {
485        assert_eq!(format_bytes(500), "500 bytes");
486        assert_eq!(format_bytes(1024), "1.0 KB");
487        assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
488        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
489        assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
490    }
491
492    #[test]
493    fn test_format_display() {
494        let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
495
496        let display = estimate.format_display();
497        assert!(display.contains("Estimated bundle size"));
498        assert!(display.contains("Conversations: 50"));
499        assert!(display.contains("Messages: 2500"));
500    }
501
502    #[test]
503    fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
504        let temp = tempfile::TempDir::new()?;
505        let db_path = temp.path().join("cass.db");
506        let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
507        conn.execute_batch(
508            "CREATE TABLE agents (
509                id INTEGER PRIMARY KEY,
510                slug TEXT NOT NULL
511            );
512            CREATE TABLE conversations (
513                id INTEGER PRIMARY KEY,
514                agent_id INTEGER NOT NULL,
515                started_at INTEGER
516            );
517            CREATE TABLE messages (
518                id INTEGER PRIMARY KEY,
519                conversation_id INTEGER NOT NULL,
520                content TEXT NOT NULL
521            );
522            INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
523            INSERT INTO conversations (id, agent_id, started_at)
524                VALUES (10, 1, 1000), (20, 2, 2000);
525            INSERT INTO messages (id, conversation_id, content)
526                VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
527        )?;
528
529        let all = SizeEstimate::from_database(&db_path, None, None, None)?;
530        assert_eq!(all.conversation_count, 2);
531        assert_eq!(all.message_count, 2);
532        assert_eq!(all.plaintext_bytes, 14);
533
534        let codex =
535            SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
536        assert_eq!(codex.conversation_count, 1);
537        assert_eq!(codex.message_count, 1);
538        assert_eq!(codex.plaintext_bytes, 9);
539
540        let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
541        assert_eq!(empty_agent_filter.conversation_count, 0);
542        assert_eq!(empty_agent_filter.message_count, 0);
543        assert_eq!(empty_agent_filter.plaintext_bytes, 0);
544
545        let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
546        assert_eq!(recent.conversation_count, 1);
547        assert_eq!(recent.message_count, 1);
548        assert_eq!(recent.plaintext_bytes, 9);
549
550        Ok(())
551    }
552
553    #[test]
554    fn test_from_database_allows_read_only_source_db() -> Result<()> {
555        let temp = tempfile::TempDir::new()?;
556        let db_path = temp.path().join("cass-read-only.db");
557        let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
558        conn.execute_batch(
559            "CREATE TABLE agents (
560                id INTEGER PRIMARY KEY,
561                slug TEXT NOT NULL
562            );
563            CREATE TABLE conversations (
564                id INTEGER PRIMARY KEY,
565                agent_id INTEGER NOT NULL,
566                started_at INTEGER
567            );
568            CREATE TABLE messages (
569                id INTEGER PRIMARY KEY,
570                conversation_id INTEGER NOT NULL,
571                content TEXT NOT NULL
572            );
573            INSERT INTO agents (id, slug) VALUES (1, 'claude');
574            INSERT INTO conversations (id, agent_id, started_at) VALUES (10, 1, 1000);
575            INSERT INTO messages (id, conversation_id, content) VALUES (100, 10, 'readonly');",
576        )?;
577        drop(conn);
578
579        let original_permissions = std::fs::metadata(&db_path)?.permissions();
580        let mut read_only_permissions = original_permissions.clone();
581        read_only_permissions.set_readonly(true);
582        std::fs::set_permissions(&db_path, read_only_permissions)?;
583
584        let estimate = SizeEstimate::from_database(&db_path, None, None, None);
585
586        std::fs::set_permissions(&db_path, original_permissions)?;
587        let estimate = estimate?;
588
589        assert_eq!(estimate.conversation_count, 1);
590        assert_eq!(estimate.message_count, 1);
591        assert_eq!(estimate.plaintext_bytes, 8);
592        Ok(())
593    }
594
595    #[test]
596    fn test_size_error_display() {
597        let err = SizeError::TotalExceedsLimit {
598            actual: 2 * 1024 * 1024 * 1024,
599            limit: 1024 * 1024 * 1024,
600        };
601
602        let msg = err.to_string();
603        assert!(msg.contains("2.0 GB"));
604        assert!(msg.contains("1.0 GB"));
605        assert!(msg.contains("Suggestions"));
606    }
607
608    #[test]
609    fn test_size_error_display_and_source_are_preserved() {
610        let cases = vec![
611            (
612                SizeError::TotalExceedsLimit {
613                    actual: 2 * 1024 * 1024 * 1024,
614                    limit: 1024 * 1024 * 1024,
615                },
616                "Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
617                 Suggestions:\n\
618                 • Use --since \"90 days ago\" for recent conversations only\n\
619                 • Use --agents <name> to limit to specific agents\n\
620                 • Use --workspaces <path> to limit projects",
621            ),
622            (
623                SizeError::FileExceedsLimit {
624                    path: "site/archive.bin".to_string(),
625                    actual: 150 * 1024 * 1024,
626                    limit: 100 * 1024 * 1024,
627                },
628                "File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
629            ),
630        ];
631
632        for (error, expected_display) in cases {
633            assert_eq!(error.to_string(), expected_display);
634            assert!(std::error::Error::source(&error).is_none());
635        }
636    }
637
638    #[test]
639    fn test_bundle_verifier() {
640        use tempfile::TempDir;
641
642        let temp = TempDir::new().unwrap();
643
644        // Create some small files
645        std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
646        std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
647
648        let warnings = BundleVerifier::verify(temp.path()).unwrap();
649        assert!(warnings.is_empty()); // No warnings for small files
650    }
651
652    #[test]
653    fn test_chunk_count_ceiling_division() {
654        // Test that chunk count uses proper ceiling division
655        // COMPRESSION_RATIO = 0.45, DEFAULT_CHUNK_SIZE = 8 MB
656
657        // Test 1: Very small data -> 1 chunk
658        let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
659        assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
660
661        // Test 2: Data that compresses to exactly 1 chunk size
662        // 8 MB / 0.45 = 17.78 MB plaintext -> exactly 8 MB compressed -> 1 chunk
663        // Use a value that when multiplied by 0.45 gives exactly DEFAULT_CHUNK_SIZE
664        let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
665        let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
666        // Due to floating point, compressed_bytes should be very close to DEFAULT_CHUNK_SIZE
667        // The important thing is it should NOT be 2 chunks when it's exactly 1 chunk of data
668        assert_eq!(
669            estimate.chunk_count, 1,
670            "Exactly one chunk's worth should be 1 chunk, not 2"
671        );
672
673        // Test 3: Data just over 1 chunk -> 2 chunks
674        let over_one_chunk = one_chunk_plaintext + 1000000; // Add ~1 MB to plaintext
675        let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
676        assert!(
677            estimate.chunk_count >= 1,
678            "Over one chunk should be at least 1 chunk"
679        );
680
681        // Test 4: Large data that compresses to ~2 chunks
682        let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
683        let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
684        assert_eq!(
685            estimate.chunk_count, 2,
686            "Exactly two chunks' worth should be 2 chunks, not 3"
687        );
688    }
689
690    #[test]
691    fn test_from_plaintext_size_handles_extremely_large_inputs() {
692        let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
693        assert_eq!(estimate.chunk_count, u32::MAX);
694        assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
695    }
696
697    #[test]
698    #[cfg(unix)]
699    fn test_visit_files_skips_symlink_paths() {
700        use std::collections::HashSet;
701        use std::os::unix::fs::symlink;
702        use tempfile::TempDir;
703
704        let src = TempDir::new().unwrap();
705        let outside = TempDir::new().unwrap();
706
707        std::fs::write(src.path().join("root.txt"), "root").unwrap();
708        std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
709        std::fs::create_dir_all(outside.path().join("nested")).unwrap();
710        std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
711
712        symlink(
713            outside.path().join("secret.txt"),
714            src.path().join("linked-file.txt"),
715        )
716        .unwrap();
717        symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
718
719        let mut visited = HashSet::new();
720        visit_files(src.path(), &mut |path, _size| {
721            visited.insert(
722                path.strip_prefix(src.path())
723                    .unwrap()
724                    .to_string_lossy()
725                    .to_string(),
726            );
727            Ok(())
728        })
729        .unwrap();
730
731        assert!(visited.contains("root.txt"));
732        assert!(!visited.contains("linked-file.txt"));
733        assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
734    }
735}