Skip to main content

coding_agent_search/pages/
size.rs

1//! Bundle size estimation and limits enforcement.
2//!
3//! Provides pre-export size estimation to warn users before they spend time
4//! exporting/encrypting data that would exceed GitHub Pages limits.
5
6use anyhow::{Context, Result, bail};
7use frankensqlite::Row;
8use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12/// Maximum site size for GitHub Pages (1 GB)
13pub const MAX_SITE_SIZE_BYTES: u64 = 1024 * 1024 * 1024;
14
15/// Warning threshold for total site size (900 MB - approaching limit)
16pub const SITE_SIZE_WARNING_BYTES: u64 = 900 * 1024 * 1024;
17
18/// Maximum file size for GitHub (100 MiB)
19pub const MAX_FILE_SIZE_BYTES: u64 = 100 * 1024 * 1024;
20
21/// Warning threshold for file size (50 MiB)
22pub const FILE_SIZE_WARNING_BYTES: u64 = 50 * 1024 * 1024;
23
24/// Default chunk size for encrypted payload (8 MiB)
25pub const DEFAULT_CHUNK_SIZE: u64 = 8 * 1024 * 1024;
26
27/// AEAD authentication tag overhead per chunk (16 bytes)
28pub const AEAD_TAG_OVERHEAD: u64 = 16;
29
30/// Estimated static assets size (HTML, JS, CSS, WASM vendor) - approximately 2 MB
31pub const STATIC_ASSETS_SIZE: u64 = 2 * 1024 * 1024;
32
33/// Typical compression ratio for text content (deflate)
34pub const COMPRESSION_RATIO: f64 = 0.45;
35
36/// Pre-export size estimate
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SizeEstimate {
39    /// Raw content size in bytes (uncompressed)
40    pub plaintext_bytes: u64,
41    /// Estimated compressed size in bytes
42    pub compressed_bytes: u64,
43    /// Estimated encrypted size in bytes (with AEAD overhead)
44    pub encrypted_bytes: u64,
45    /// Static assets size (HTML, JS, CSS, WASM)
46    pub static_assets_bytes: u64,
47    /// Total estimated site size
48    pub total_site_bytes: u64,
49    /// Estimated number of payload chunks
50    pub chunk_count: u32,
51    /// Number of conversations included
52    pub conversation_count: u64,
53    /// Number of messages included
54    pub message_count: u64,
55}
56
57impl SizeEstimate {
58    /// Create a size estimate from a database and filter
59    pub fn from_database<P: AsRef<Path>>(
60        db_path: P,
61        agents: Option<&[String]>,
62        since_ts: Option<i64>,
63        until_ts: Option<i64>,
64    ) -> Result<Self> {
65        let conn = super::open_existing_sqlite_db(db_path.as_ref())
66            .context("Failed to open database for size estimation")?;
67
68        conn.execute("PRAGMA busy_timeout = 5000;")?;
69
70        // Build filter conditions
71        let mut conditions = Vec::new();
72        let mut param_values: Vec<ParamValue> = Vec::new();
73
74        if let Some(agents) = agents {
75            if agents.is_empty() {
76                conditions.push("1=0".to_string());
77            } else {
78                let placeholders: Vec<_> = agents.iter().map(|_| "?").collect();
79                // Use a non-correlated subquery via IN instead of a correlated
80                // EXISTS. frankensqlite's current planner doesn't match the
81                // correlated `a.id = c.agent_id` join condition consistently;
82                // the non-correlated form is what the rest of the codebase
83                // uses (see src/pages/summary.rs::get_date_histogram).
84                conditions.push(format!(
85                    "c.agent_id IN (SELECT a.id FROM agents a WHERE a.slug IN ({}))",
86                    placeholders.join(", ")
87                ));
88                for agent in agents {
89                    param_values.push(ParamValue::from(agent.as_str()));
90                }
91            }
92        }
93
94        if let Some(since) = since_ts {
95            conditions.push("c.started_at >= ?".to_string());
96            param_values.push(ParamValue::from(since));
97        }
98
99        if let Some(until) = until_ts {
100            conditions.push("c.started_at <= ?".to_string());
101            param_values.push(ParamValue::from(until));
102        }
103
104        let where_clause = if conditions.is_empty() {
105            String::new()
106        } else {
107            format!(" WHERE {}", conditions.join(" AND "))
108        };
109
110        let params_slice = &param_values;
111
112        // Query conversation count. We read the COUNT(*) cell as Option<i64>
113        // because frankensqlite can return NULL from `COUNT(*)` when the
114        // WHERE clause excludes all rows (e.g., the empty-agent-filter "1=0"
115        // path) — standard SQLite returns 0, fsqlite currently returns NULL.
116        // The Option<i64>.unwrap_or(0) shim absorbs the difference without a
117        // type-mismatch panic.
118        let conv_sql = format!("SELECT COUNT(*) FROM conversations c{}", where_clause);
119        let conversation_count: u64 = conn
120            .query_row_map(&conv_sql, params_slice, |row: &Row| {
121                row.get_typed::<Option<i64>>(0)
122                    .map(|opt| opt.unwrap_or(0).max(0) as u64)
123            })
124            .with_context(|| {
125                format!("Failed to count conversations for size estimate: {conv_sql}")
126            })?;
127
128        // Query message count and content size
129        let msg_sql = format!(
130            "SELECT COUNT(*), SUM(LENGTH(m.content))
131             FROM messages m
132             JOIN conversations c ON m.conversation_id = c.id
133             {}",
134            where_clause
135        );
136        let (message_count, plaintext_bytes): (u64, u64) = conn
137            .query_row_map(&msg_sql, params_slice, |row: &Row| {
138                let raw_message_count = row.get_typed::<i64>(0).unwrap_or(0);
139                let raw_plaintext_bytes = row.get_typed::<Option<i64>>(1)?.unwrap_or(0);
140                Ok((
141                    raw_message_count.max(0) as u64,
142                    raw_plaintext_bytes.max(0) as u64,
143                ))
144            })
145            .with_context(|| format!("Failed to estimate message payload size: {msg_sql}"))?;
146
147        Self::from_plaintext_size(plaintext_bytes, conversation_count, message_count)
148    }
149
150    /// Create estimate from known plaintext size
151    pub fn from_plaintext_size(
152        plaintext_bytes: u64,
153        conversation_count: u64,
154        message_count: u64,
155    ) -> Result<Self> {
156        // Estimate compression
157        let compressed_bytes = (plaintext_bytes as f64 * COMPRESSION_RATIO) as u64;
158
159        // Calculate chunk count (minimum of 1 chunk even for empty content)
160        let chunk_count_u64 = compressed_bytes.div_ceil(DEFAULT_CHUNK_SIZE).max(1);
161        let chunk_count = u32::try_from(chunk_count_u64).unwrap_or(u32::MAX);
162
163        // Add AEAD overhead
164        let aead_overhead = u64::from(chunk_count)
165            .checked_mul(AEAD_TAG_OVERHEAD)
166            .ok_or_else(|| anyhow::anyhow!("AEAD overhead overflow"))?;
167        let encrypted_bytes = compressed_bytes
168            .checked_add(aead_overhead)
169            .ok_or_else(|| anyhow::anyhow!("Encrypted size overflow"))?;
170
171        // Total with static assets
172        let total_site_bytes = encrypted_bytes
173            .checked_add(STATIC_ASSETS_SIZE)
174            .ok_or_else(|| anyhow::anyhow!("Total site size overflow"))?;
175
176        Ok(Self {
177            plaintext_bytes,
178            compressed_bytes,
179            encrypted_bytes,
180            static_assets_bytes: STATIC_ASSETS_SIZE,
181            total_site_bytes,
182            chunk_count,
183            conversation_count,
184            message_count,
185        })
186    }
187
188    /// Check if the estimate exceeds hard limits
189    pub fn check_limits(&self) -> SizeLimitResult {
190        if self.total_site_bytes > MAX_SITE_SIZE_BYTES {
191            return SizeLimitResult::ExceedsLimit(SizeError::TotalExceedsLimit {
192                actual: self.total_site_bytes,
193                limit: MAX_SITE_SIZE_BYTES,
194            });
195        }
196
197        if self.total_site_bytes > SITE_SIZE_WARNING_BYTES {
198            return SizeLimitResult::Warning(SizeWarning::ApproachingLimit {
199                actual: self.total_site_bytes,
200                limit: MAX_SITE_SIZE_BYTES,
201                percentage: (self.total_site_bytes as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0)
202                    as u8,
203            });
204        }
205
206        SizeLimitResult::Ok
207    }
208
209    /// Format the estimate for display
210    pub fn format_display(&self) -> String {
211        format!(
212            "Estimated bundle size: {}\n\
213             • Payload: {} ({} chunks × {} max)\n\
214             • Static assets: {}\n\
215             • Compression ratio: ~{:.0}%\n\
216             • Conversations: {}\n\
217             • Messages: {}",
218            format_bytes(self.total_site_bytes),
219            format_bytes(self.encrypted_bytes),
220            self.chunk_count,
221            format_bytes(DEFAULT_CHUNK_SIZE),
222            format_bytes(self.static_assets_bytes),
223            COMPRESSION_RATIO * 100.0,
224            self.conversation_count,
225            self.message_count,
226        )
227    }
228}
229
230/// Result of checking size limits
231#[derive(Debug, Clone)]
232pub enum SizeLimitResult {
233    /// Size is within limits
234    Ok,
235    /// Size is approaching limits (warning)
236    Warning(SizeWarning),
237    /// Size exceeds limits (error)
238    ExceedsLimit(SizeError),
239}
240
241impl SizeLimitResult {
242    /// Returns true if export should proceed
243    pub fn is_ok(&self) -> bool {
244        matches!(self, SizeLimitResult::Ok)
245    }
246
247    /// Returns true if there's a warning but export can proceed
248    pub fn is_warning(&self) -> bool {
249        matches!(self, SizeLimitResult::Warning(_))
250    }
251
252    /// Returns true if export should be blocked
253    pub fn is_error(&self) -> bool {
254        matches!(self, SizeLimitResult::ExceedsLimit(_))
255    }
256}
257
258/// Size-related errors
259#[derive(Debug, Clone, thiserror::Error)]
260pub enum SizeError {
261    /// Total site size exceeds GitHub Pages limit
262    #[error(
263        "Total size ({}) exceeds GitHub Pages limit ({})\n\n\
264         Suggestions:\n\
265         • Use --since \"90 days ago\" for recent conversations only\n\
266         • Use --agents <name> to limit to specific agents\n\
267         • Use --workspaces <path> to limit projects",
268        format_bytes(*actual),
269        format_bytes(*limit)
270    )]
271    TotalExceedsLimit { actual: u64, limit: u64 },
272    /// Individual file exceeds limit
273    #[error("File {path} ({}) exceeds limit ({})", format_bytes(*actual), format_bytes(*limit))]
274    FileExceedsLimit {
275        path: String,
276        actual: u64,
277        limit: u64,
278    },
279}
280
281/// Size-related warnings
282#[derive(Debug, Clone)]
283pub enum SizeWarning {
284    /// Total size is approaching limit
285    ApproachingLimit {
286        actual: u64,
287        limit: u64,
288        percentage: u8,
289    },
290    /// Individual file is large
291    LargeFile { path: String, size: u64 },
292}
293
294impl std::fmt::Display for SizeWarning {
295    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
296        match self {
297            SizeWarning::ApproachingLimit {
298                actual,
299                limit,
300                percentage,
301            } => {
302                write!(
303                    f,
304                    "Estimated size {} is {}% of GitHub Pages limit ({})",
305                    format_bytes(*actual),
306                    percentage,
307                    format_bytes(*limit)
308                )
309            }
310            SizeWarning::LargeFile { path, size } => {
311                write!(f, "Large file: {} ({})", path, format_bytes(*size))
312            }
313        }
314    }
315}
316
317/// Post-export bundle verification
318pub struct BundleVerifier;
319
320impl BundleVerifier {
321    /// Verify a bundle directory meets all size constraints
322    pub fn verify<P: AsRef<Path>>(site_dir: P) -> Result<Vec<SizeWarning>> {
323        let site_dir = site_dir.as_ref();
324        let mut warnings = Vec::new();
325        let mut total_size = 0u64;
326
327        visit_files(site_dir, &mut |path, size| {
328            total_size += size;
329
330            if size > MAX_FILE_SIZE_BYTES {
331                bail!(
332                    "File {} ({}) exceeds maximum file size ({}). Chunking may have failed.",
333                    path.display(),
334                    format_bytes(size),
335                    format_bytes(MAX_FILE_SIZE_BYTES)
336                );
337            }
338
339            if size > FILE_SIZE_WARNING_BYTES {
340                let rel_path = path
341                    .strip_prefix(site_dir)
342                    .unwrap_or(path)
343                    .to_string_lossy()
344                    .to_string();
345                warnings.push(SizeWarning::LargeFile {
346                    path: rel_path,
347                    size,
348                });
349            }
350
351            Ok(())
352        })?;
353
354        if total_size > MAX_SITE_SIZE_BYTES {
355            bail!(
356                "Total bundle size ({}) exceeds GitHub Pages limit ({})",
357                format_bytes(total_size),
358                format_bytes(MAX_SITE_SIZE_BYTES)
359            );
360        }
361
362        if total_size > SITE_SIZE_WARNING_BYTES {
363            warnings.push(SizeWarning::ApproachingLimit {
364                actual: total_size,
365                limit: MAX_SITE_SIZE_BYTES,
366                percentage: (total_size as f64 / MAX_SITE_SIZE_BYTES as f64 * 100.0) as u8,
367            });
368        }
369
370        Ok(warnings)
371    }
372}
373
374/// Visit all files in a directory recursively
375fn visit_files<F>(dir: &Path, f: &mut F) -> Result<()>
376where
377    F: FnMut(&Path, u64) -> Result<()>,
378{
379    for entry in std::fs::read_dir(dir)? {
380        let entry = entry?;
381        let path = entry.path();
382        let metadata = std::fs::symlink_metadata(&path)?;
383        let file_type = metadata.file_type();
384
385        if file_type.is_symlink() {
386            continue;
387        }
388
389        if file_type.is_dir() {
390            visit_files(&path, f)?;
391        } else if file_type.is_file() {
392            f(&path, metadata.len())?;
393        }
394    }
395    Ok(())
396}
397
398/// Format bytes as human-readable string
399fn format_bytes(bytes: u64) -> String {
400    const KB: u64 = 1024;
401    const MB: u64 = 1024 * KB;
402    const GB: u64 = 1024 * MB;
403
404    if bytes >= GB {
405        format!("{:.1} GB", bytes as f64 / GB as f64)
406    } else if bytes >= MB {
407        format!("{:.1} MB", bytes as f64 / MB as f64)
408    } else if bytes >= KB {
409        format!("{:.1} KB", bytes as f64 / KB as f64)
410    } else {
411        format!("{} bytes", bytes)
412    }
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418    use frankensqlite::Connection;
419
420    #[test]
421    fn test_size_estimate_from_plaintext() {
422        let estimate = SizeEstimate::from_plaintext_size(
423            10 * 1024 * 1024, // 10 MB plaintext
424            100,
425            5000,
426        )
427        .unwrap();
428
429        // Should compress to ~4.5 MB
430        assert!(estimate.compressed_bytes < estimate.plaintext_bytes);
431        assert_eq!(estimate.conversation_count, 100);
432        assert_eq!(estimate.message_count, 5000);
433        assert!(estimate.chunk_count >= 1);
434    }
435
436    #[test]
437    fn test_size_estimate_empty() {
438        let estimate = SizeEstimate::from_plaintext_size(0, 0, 0).unwrap();
439        assert_eq!(estimate.plaintext_bytes, 0);
440        assert_eq!(estimate.chunk_count, 1); // At least 1 chunk
441        assert_eq!(estimate.static_assets_bytes, STATIC_ASSETS_SIZE);
442    }
443
444    #[test]
445    fn test_size_limit_ok() {
446        let estimate = SizeEstimate::from_plaintext_size(
447            100 * 1024 * 1024, // 100 MB - should be fine
448            100,
449            5000,
450        )
451        .unwrap();
452
453        let result = estimate.check_limits();
454        assert!(result.is_ok());
455    }
456
457    #[test]
458    fn test_size_limit_warning() {
459        // Need ~900 MB compressed to trigger warning
460        // 900 MB / 0.45 compression = 2000 MB plaintext
461        let estimate = SizeEstimate::from_plaintext_size(
462            2000 * 1024 * 1024, // 2 GB plaintext -> ~900 MB compressed
463            1000,
464            50000,
465        )
466        .unwrap();
467
468        let result = estimate.check_limits();
469        assert!(result.is_warning() || result.is_error());
470    }
471
472    #[test]
473    fn test_size_limit_exceeded() {
474        let estimate = SizeEstimate::from_plaintext_size(
475            3000 * 1024 * 1024, // 3 GB plaintext -> ~1.35 GB compressed
476            5000,
477            250000,
478        )
479        .unwrap();
480
481        let result = estimate.check_limits();
482        assert!(result.is_error());
483    }
484
485    #[test]
486    fn test_format_bytes() {
487        assert_eq!(format_bytes(500), "500 bytes");
488        assert_eq!(format_bytes(1024), "1.0 KB");
489        assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
490        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
491        assert_eq!(format_bytes(1536 * 1024), "1.5 MB");
492    }
493
494    #[test]
495    fn test_format_display() {
496        let estimate = SizeEstimate::from_plaintext_size(10 * 1024 * 1024, 50, 2500).unwrap();
497
498        let display = estimate.format_display();
499        assert!(display.contains("Estimated bundle size"));
500        assert!(display.contains("Conversations: 50"));
501        assert!(display.contains("Messages: 2500"));
502    }
503
504    #[test]
505    fn test_from_database_filters_agents_through_agents_table() -> Result<()> {
506        let temp = tempfile::TempDir::new()?;
507        let db_path = temp.path().join("cass.db");
508        let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
509        conn.execute_batch(
510            "CREATE TABLE agents (
511                id INTEGER PRIMARY KEY,
512                slug TEXT NOT NULL
513            );
514            CREATE TABLE conversations (
515                id INTEGER PRIMARY KEY,
516                agent_id INTEGER NOT NULL,
517                started_at INTEGER
518            );
519            CREATE TABLE messages (
520                id INTEGER PRIMARY KEY,
521                conversation_id INTEGER NOT NULL,
522                content TEXT NOT NULL
523            );
524            INSERT INTO agents (id, slug) VALUES (1, 'claude'), (2, 'codex');
525            INSERT INTO conversations (id, agent_id, started_at)
526                VALUES (10, 1, 1000), (20, 2, 2000);
527            INSERT INTO messages (id, conversation_id, content)
528                VALUES (100, 10, 'hello'), (200, 20, 'rust code');",
529        )?;
530
531        let all = SizeEstimate::from_database(&db_path, None, None, None)?;
532        assert_eq!(all.conversation_count, 2);
533        assert_eq!(all.message_count, 2);
534        assert_eq!(all.plaintext_bytes, 14);
535
536        let codex =
537            SizeEstimate::from_database(&db_path, Some(&["codex".to_string()]), None, None)?;
538        assert_eq!(codex.conversation_count, 1);
539        assert_eq!(codex.message_count, 1);
540        assert_eq!(codex.plaintext_bytes, 9);
541
542        let empty_agent_filter = SizeEstimate::from_database(&db_path, Some(&[]), None, None)?;
543        assert_eq!(empty_agent_filter.conversation_count, 0);
544        assert_eq!(empty_agent_filter.message_count, 0);
545        assert_eq!(empty_agent_filter.plaintext_bytes, 0);
546
547        let recent = SizeEstimate::from_database(&db_path, None, Some(1500), None)?;
548        assert_eq!(recent.conversation_count, 1);
549        assert_eq!(recent.message_count, 1);
550        assert_eq!(recent.plaintext_bytes, 9);
551
552        Ok(())
553    }
554
555    #[test]
556    fn test_from_database_allows_read_only_source_db() -> Result<()> {
557        let temp = tempfile::TempDir::new()?;
558        let db_path = temp.path().join("cass-read-only.db");
559        let conn = Connection::open(db_path.to_string_lossy().as_ref())?;
560        conn.execute_batch(
561            "CREATE TABLE agents (
562                id INTEGER PRIMARY KEY,
563                slug TEXT NOT NULL
564            );
565            CREATE TABLE conversations (
566                id INTEGER PRIMARY KEY,
567                agent_id INTEGER NOT NULL,
568                started_at INTEGER
569            );
570            CREATE TABLE messages (
571                id INTEGER PRIMARY KEY,
572                conversation_id INTEGER NOT NULL,
573                content TEXT NOT NULL
574            );
575            INSERT INTO agents (id, slug) VALUES (1, 'claude');
576            INSERT INTO conversations (id, agent_id, started_at) VALUES (10, 1, 1000);
577            INSERT INTO messages (id, conversation_id, content) VALUES (100, 10, 'readonly');",
578        )?;
579        drop(conn);
580
581        let original_permissions = std::fs::metadata(&db_path)?.permissions();
582        let mut read_only_permissions = original_permissions.clone();
583        read_only_permissions.set_readonly(true);
584        std::fs::set_permissions(&db_path, read_only_permissions)?;
585
586        let estimate = SizeEstimate::from_database(&db_path, None, None, None);
587
588        std::fs::set_permissions(&db_path, original_permissions)?;
589        let estimate = estimate?;
590
591        assert_eq!(estimate.conversation_count, 1);
592        assert_eq!(estimate.message_count, 1);
593        assert_eq!(estimate.plaintext_bytes, 8);
594        Ok(())
595    }
596
597    #[test]
598    fn test_size_error_display() {
599        let err = SizeError::TotalExceedsLimit {
600            actual: 2 * 1024 * 1024 * 1024,
601            limit: 1024 * 1024 * 1024,
602        };
603
604        let msg = err.to_string();
605        assert!(msg.contains("2.0 GB"));
606        assert!(msg.contains("1.0 GB"));
607        assert!(msg.contains("Suggestions"));
608    }
609
610    #[test]
611    fn test_size_error_display_and_source_are_preserved() {
612        let cases = vec![
613            (
614                SizeError::TotalExceedsLimit {
615                    actual: 2 * 1024 * 1024 * 1024,
616                    limit: 1024 * 1024 * 1024,
617                },
618                "Total size (2.0 GB) exceeds GitHub Pages limit (1.0 GB)\n\n\
619                 Suggestions:\n\
620                 • Use --since \"90 days ago\" for recent conversations only\n\
621                 • Use --agents <name> to limit to specific agents\n\
622                 • Use --workspaces <path> to limit projects",
623            ),
624            (
625                SizeError::FileExceedsLimit {
626                    path: "site/archive.bin".to_string(),
627                    actual: 150 * 1024 * 1024,
628                    limit: 100 * 1024 * 1024,
629                },
630                "File site/archive.bin (150.0 MB) exceeds limit (100.0 MB)",
631            ),
632        ];
633
634        for (error, expected_display) in cases {
635            assert_eq!(error.to_string(), expected_display);
636            assert!(std::error::Error::source(&error).is_none());
637        }
638    }
639
640    #[test]
641    fn test_bundle_verifier() {
642        use tempfile::TempDir;
643
644        let temp = TempDir::new().unwrap();
645
646        // Create some small files
647        std::fs::write(temp.path().join("small.txt"), vec![0u8; 1000]).unwrap();
648        std::fs::write(temp.path().join("medium.txt"), vec![0u8; 10000]).unwrap();
649
650        let warnings = BundleVerifier::verify(temp.path()).unwrap();
651        assert!(warnings.is_empty()); // No warnings for small files
652    }
653
654    #[test]
655    fn test_chunk_count_ceiling_division() {
656        // Test that chunk count uses proper ceiling division
657        // COMPRESSION_RATIO = 0.45, DEFAULT_CHUNK_SIZE = 8 MB
658
659        // Test 1: Very small data -> 1 chunk
660        let estimate = SizeEstimate::from_plaintext_size(1000, 1, 10).unwrap();
661        assert_eq!(estimate.chunk_count, 1, "Small data should be 1 chunk");
662
663        // Test 2: Data that compresses to exactly 1 chunk size
664        // 8 MB / 0.45 = 17.78 MB plaintext -> exactly 8 MB compressed -> 1 chunk
665        // Use a value that when multiplied by 0.45 gives exactly DEFAULT_CHUNK_SIZE
666        let one_chunk_plaintext = (DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
667        let estimate = SizeEstimate::from_plaintext_size(one_chunk_plaintext, 10, 100).unwrap();
668        // Due to floating point, compressed_bytes should be very close to DEFAULT_CHUNK_SIZE
669        // The important thing is it should NOT be 2 chunks when it's exactly 1 chunk of data
670        assert_eq!(
671            estimate.chunk_count, 1,
672            "Exactly one chunk's worth should be 1 chunk, not 2"
673        );
674
675        // Test 3: Data just over 1 chunk -> 2 chunks
676        let over_one_chunk = one_chunk_plaintext + 1000000; // Add ~1 MB to plaintext
677        let estimate = SizeEstimate::from_plaintext_size(over_one_chunk, 10, 100).unwrap();
678        assert!(
679            estimate.chunk_count >= 1,
680            "Over one chunk should be at least 1 chunk"
681        );
682
683        // Test 4: Large data that compresses to ~2 chunks
684        let two_chunks_plaintext = (2.0 * DEFAULT_CHUNK_SIZE as f64 / COMPRESSION_RATIO) as u64;
685        let estimate = SizeEstimate::from_plaintext_size(two_chunks_plaintext, 100, 1000).unwrap();
686        assert_eq!(
687            estimate.chunk_count, 2,
688            "Exactly two chunks' worth should be 2 chunks, not 3"
689        );
690    }
691
692    #[test]
693    fn test_from_plaintext_size_handles_extremely_large_inputs() {
694        let estimate = SizeEstimate::from_plaintext_size(u64::MAX, 1, 1).unwrap();
695        assert_eq!(estimate.chunk_count, u32::MAX);
696        assert!(estimate.total_site_bytes >= estimate.compressed_bytes);
697    }
698
699    #[test]
700    #[cfg(unix)]
701    fn test_visit_files_skips_symlink_paths() {
702        use std::collections::HashSet;
703        use std::os::unix::fs::symlink;
704        use tempfile::TempDir;
705
706        let src = TempDir::new().unwrap();
707        let outside = TempDir::new().unwrap();
708
709        std::fs::write(src.path().join("root.txt"), "root").unwrap();
710        std::fs::write(outside.path().join("secret.txt"), "secret").unwrap();
711        std::fs::create_dir_all(outside.path().join("nested")).unwrap();
712        std::fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
713
714        symlink(
715            outside.path().join("secret.txt"),
716            src.path().join("linked-file.txt"),
717        )
718        .unwrap();
719        symlink(outside.path().join("nested"), src.path().join("linked-dir")).unwrap();
720
721        let mut visited = HashSet::new();
722        visit_files(src.path(), &mut |path, _size| {
723            visited.insert(
724                path.strip_prefix(src.path())
725                    .unwrap()
726                    .to_string_lossy()
727                    .to_string(),
728            );
729            Ok(())
730        })
731        .unwrap();
732
733        assert!(visited.contains("root.txt"));
734        assert!(!visited.contains("linked-file.txt"));
735        assert!(!visited.iter().any(|p| p.starts_with("linked-dir/")));
736    }
737}