Skip to main content

coding_agent_search/pages/
verify.rs

1//! Verify command for CI pipelines.
2//!
3//! Provides `cass pages --verify <PATH>` to validate an existing export bundle for CI/CD.
4//! The verifier confirms correct structure, config schema, payload integrity, and
5//! the absence of secrets in site/.
6
7use anyhow::{Context, Result};
8use base64::prelude::*;
9use serde::{Deserialize, Serialize};
10use serde_json::{Map, Value};
11use sha2::{Digest, Sha256};
12use std::collections::HashSet;
13use std::fs::{self, File};
14use std::io::{BufReader, Read};
15use std::path::Path;
16
17use super::archive_config::{ArchiveConfig, UnencryptedConfig};
18use super::bundle::IntegrityManifest;
19use super::encrypt::{EncryptionConfig, SCHEMA_VERSION};
20use std::fmt;
21
22/// Maximum chunk file size (GitHub Pages hard limit)
23const MAX_CHUNK_SIZE: u64 = 100 * 1024 * 1024; // 100 MB
24
25/// Maximum chunk_size config value (32 MiB)
26const MAX_CONFIG_CHUNK_SIZE: usize = 32 * 1024 * 1024;
27
28/// Required files that must exist in site/
29const REQUIRED_FILES: &[&str] = &[
30    "index.html",
31    "config.json",
32    "sw.js",
33    "viewer.js",
34    "auth.js",
35    "styles.css",
36    "robots.txt",
37    ".nojekyll",
38];
39
40/// Files that indicate secret leakage
41const SECRET_FILES: &[&str] = &[
42    "recovery-secret.txt",
43    "qr-code.png",
44    "qr-code.svg",
45    "master-key.json",
46];
47
48/// Directories that should not exist in site/
49const SECRET_DIRS: &[&str] = &["private"];
50
51/// JSON keys in config.json that indicate plaintext secret leakage.
52const FORBIDDEN_CONFIG_KEYS: &[(&str, &str)] = &[
53    ("password", "password field"),
54    ("secret", "secret field"),
55    ("private_key", "private_key field"),
56    ("master_key", "master_key field"),
57    ("recovery_secret", "recovery_secret"),
58];
59
60const ENCRYPTED_CONFIG_KEYS: &[&str] = &[
61    "version",
62    "export_id",
63    "base_nonce",
64    "compression",
65    "kdf_defaults",
66    "payload",
67    "key_slots",
68];
69const UNENCRYPTED_CONFIG_KEYS: &[&str] = &["encrypted", "version", "payload", "warning"];
70const ENCRYPTED_PAYLOAD_KEYS: &[&str] = &[
71    "chunk_size",
72    "chunk_count",
73    "total_compressed_size",
74    "total_plaintext_size",
75    "files",
76];
77const UNENCRYPTED_PAYLOAD_KEYS: &[&str] = &["path", "format", "size_bytes"];
78const ARGON2_PARAM_KEYS: &[&str] = &["memory_kb", "iterations", "parallelism"];
79const KEY_SLOT_KEYS: &[&str] = &[
80    "id",
81    "slot_type",
82    "kdf",
83    "salt",
84    "wrapped_dek",
85    "nonce",
86    "argon2_params",
87];
88
89/// Verification result for a single check
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct CheckResult {
92    /// Whether the check passed
93    pub passed: bool,
94    /// Details about the check (empty if passed, error message if failed)
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub details: Option<String>,
97}
98
99impl CheckResult {
100    fn pass() -> Self {
101        Self {
102            passed: true,
103            details: None,
104        }
105    }
106
107    fn fail(details: impl Into<String>) -> Self {
108        Self {
109            passed: false,
110            details: Some(details.into()),
111        }
112    }
113}
114
115/// Summary of all verification checks
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct VerifyChecks {
118    pub required_files: CheckResult,
119    pub config_schema: CheckResult,
120    pub payload_manifest: CheckResult,
121    pub size_limits: CheckResult,
122    pub integrity: CheckResult,
123    pub no_secrets_in_site: CheckResult,
124}
125
126impl VerifyChecks {
127    /// Returns true if all checks passed
128    pub fn all_passed(&self) -> bool {
129        self.required_files.passed
130            && self.config_schema.passed
131            && self.payload_manifest.passed
132            && self.size_limits.passed
133            && self.integrity.passed
134            && self.no_secrets_in_site.passed
135    }
136}
137
138/// Complete verification result
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct VerifyResult {
141    /// Overall status: "valid" or "invalid"
142    pub status: String,
143    /// Individual check results
144    pub checks: VerifyChecks,
145    /// Warning messages (non-fatal issues)
146    pub warnings: Vec<String>,
147    /// Total site size in bytes
148    pub site_size_bytes: u64,
149}
150
151/// Verify a bundle export
152///
153/// # Arguments
154/// * `path` - Path to the export root (containing site/) or site/ directory itself
155/// * `verbose` - Whether to print detailed progress
156///
157/// # Returns
158/// `VerifyResult` with all check outcomes
159pub fn verify_bundle(path: &Path, verbose: bool) -> Result<VerifyResult> {
160    // Resolve to site/ directory
161    let site_dir = super::resolve_site_dir(path)?;
162
163    if verbose {
164        println!("Verifying bundle at: {}", site_dir.display());
165    }
166
167    let warnings = Vec::new();
168
169    // Check 1: Required files
170    if verbose {
171        println!("  Checking required files...");
172    }
173    let required_files = check_required_files(&site_dir);
174
175    // Check 2: Config schema (only if config.json exists)
176    if verbose {
177        println!("  Checking config.json schema...");
178    }
179    let config_schema = if site_dir.join("config.json").exists() {
180        check_config_schema(&site_dir)
181    } else {
182        CheckResult::fail("config.json not found")
183    };
184
185    // Check 3: Payload manifest
186    if verbose {
187        println!("  Checking payload manifest...");
188    }
189    let payload_manifest = check_payload_manifest(&site_dir);
190
191    // Check 4: Size limits
192    if verbose {
193        println!("  Checking size limits...");
194    }
195    let size_limits = check_size_limits(&site_dir);
196
197    // Check 5: Integrity (if integrity.json exists)
198    if verbose {
199        println!("  Checking integrity...");
200    }
201    let integrity = if site_dir.join("integrity.json").exists() {
202        check_integrity(&site_dir, verbose)
203    } else {
204        CheckResult::fail("integrity.json missing — bundle integrity cannot be verified")
205    };
206
207    // Check 6: No secrets in site/
208    if verbose {
209        println!("  Checking for secret leakage...");
210    }
211    let no_secrets_in_site = check_no_secrets(&site_dir);
212
213    // Calculate total site size
214    let site_size_bytes = calculate_dir_size(&site_dir)?;
215
216    let checks = VerifyChecks {
217        required_files,
218        config_schema,
219        payload_manifest,
220        size_limits,
221        integrity,
222        no_secrets_in_site,
223    };
224
225    let status = if checks.all_passed() {
226        "valid".to_string()
227    } else {
228        "invalid".to_string()
229    };
230
231    Ok(VerifyResult {
232        status,
233        checks,
234        warnings,
235        site_size_bytes,
236    })
237}
238
239/// Check that all required files exist
240fn check_required_files(site_dir: &Path) -> CheckResult {
241    let mut missing = Vec::new();
242    let mut invalid = Vec::new();
243
244    for file in REQUIRED_FILES {
245        let path = site_dir.join(file);
246        match fs::symlink_metadata(&path) {
247            Ok(metadata) => {
248                let file_type = metadata.file_type();
249                if file_type.is_file() {
250                    continue;
251                }
252                if file_type.is_symlink()
253                    && let Ok(target_meta) = fs::metadata(&path)
254                    && target_meta.file_type().is_file()
255                {
256                    continue;
257                }
258                invalid.push(format!("{file} (must be a regular file)"));
259            }
260            Err(_) => missing.push(*file),
261        }
262    }
263
264    // Also check payload/ directory exists
265    if !site_dir.join("payload").is_dir() {
266        missing.push("payload/");
267    }
268
269    if missing.is_empty() && invalid.is_empty() {
270        CheckResult::pass()
271    } else {
272        let mut parts = Vec::new();
273        if !missing.is_empty() {
274            parts.push(format!("Missing files: {}", missing.join(", ")));
275        }
276        if !invalid.is_empty() {
277            parts.push(format!("Invalid required files: {}", invalid.join(", ")));
278        }
279        CheckResult::fail(parts.join("; "))
280    }
281}
282
283/// Check config.json schema validity
284fn check_config_schema(site_dir: &Path) -> CheckResult {
285    let config_path = site_dir.join("config.json");
286
287    let content = match fs::read_to_string(&config_path).context("Failed to read config.json") {
288        Ok(content) => content,
289        Err(e) => return CheckResult::fail(format!("Failed to read config.json: {}", e)),
290    };
291
292    let config_json: Value =
293        match serde_json::from_str(&content).context("Failed to parse JSON syntax") {
294            Ok(json) => json,
295            Err(e) => return CheckResult::fail(format!("Failed to parse config.json: {}", e)),
296        };
297
298    let unknown_field_errors = find_unknown_config_fields(&config_json);
299    if !unknown_field_errors.is_empty() {
300        return CheckResult::fail(unknown_field_errors.join("; "));
301    }
302
303    let config: ArchiveConfig = match serde_json::from_value(config_json) {
304        Ok(c) => c,
305        Err(e) => return CheckResult::fail(format!("Failed to parse config.json: {}", e)),
306    };
307
308    let errors = match &config {
309        ArchiveConfig::Encrypted(enc) => validate_encrypted_config(enc),
310        ArchiveConfig::Unencrypted(unenc) => validate_unencrypted_config(unenc),
311    };
312
313    if errors.is_empty() {
314        CheckResult::pass()
315    } else {
316        CheckResult::fail(errors.join("; "))
317    }
318}
319
320fn find_unknown_config_fields(value: &Value) -> Vec<String> {
321    let mut errors = Vec::new();
322    let Some(root) = value.as_object() else {
323        return errors;
324    };
325
326    if root.contains_key("encrypted") {
327        collect_unknown_fields(root, UNENCRYPTED_CONFIG_KEYS, "", &mut errors);
328        if let Some(payload) = root.get("payload").and_then(Value::as_object) {
329            collect_unknown_fields(payload, UNENCRYPTED_PAYLOAD_KEYS, "payload", &mut errors);
330        }
331    } else {
332        collect_unknown_fields(root, ENCRYPTED_CONFIG_KEYS, "", &mut errors);
333        if let Some(payload) = root.get("payload").and_then(Value::as_object) {
334            collect_unknown_fields(payload, ENCRYPTED_PAYLOAD_KEYS, "payload", &mut errors);
335        }
336        if let Some(params) = root.get("kdf_defaults").and_then(Value::as_object) {
337            collect_unknown_fields(params, ARGON2_PARAM_KEYS, "kdf_defaults", &mut errors);
338        }
339        if let Some(slots) = root.get("key_slots").and_then(Value::as_array) {
340            for (idx, slot) in slots.iter().enumerate() {
341                if let Some(slot_obj) = slot.as_object() {
342                    let slot_path = format!("key_slots[{idx}]");
343                    collect_unknown_fields(slot_obj, KEY_SLOT_KEYS, &slot_path, &mut errors);
344                    if let Some(params) = slot_obj.get("argon2_params").and_then(Value::as_object) {
345                        collect_unknown_fields(
346                            params,
347                            ARGON2_PARAM_KEYS,
348                            &format!("{slot_path}.argon2_params"),
349                            &mut errors,
350                        );
351                    }
352                }
353            }
354        }
355    }
356
357    errors
358}
359
360fn collect_unknown_fields(
361    object: &Map<String, Value>,
362    allowed_keys: &[&str],
363    current_path: &str,
364    errors: &mut Vec<String>,
365) {
366    for key in object.keys() {
367        if !allowed_keys.contains(&key.as_str()) {
368            let path = if current_path.is_empty() {
369                key.clone()
370            } else {
371                format!("{current_path}.{key}")
372            };
373            errors.push(format!("config.json contains unknown field: {path}"));
374        }
375    }
376}
377
378fn validate_encrypted_config(config: &EncryptionConfig) -> Vec<String> {
379    let mut errors = Vec::new();
380
381    if config.version != SCHEMA_VERSION {
382        errors.push(format!(
383            "version must be {}; got {}. The current encrypted pages format supports only schema version {}.",
384            SCHEMA_VERSION, config.version, SCHEMA_VERSION
385        ));
386    }
387
388    // Validate export_id (base64, 16 bytes)
389    match BASE64_STANDARD.decode(&config.export_id) {
390        Ok(bytes) if bytes.len() == 16 => {}
391        Ok(bytes) => errors.push(format!("export_id should be 16 bytes, got {}", bytes.len())),
392        Err(e) => errors.push(format!("export_id is not valid base64: {}", e)),
393    }
394
395    // Validate base_nonce (base64, 12 bytes)
396    match BASE64_STANDARD.decode(&config.base_nonce) {
397        Ok(bytes) if bytes.len() == 12 => {}
398        Ok(bytes) => errors.push(format!(
399            "base_nonce should be 12 bytes, got {}",
400            bytes.len()
401        )),
402        Err(e) => errors.push(format!("base_nonce is not valid base64: {}", e)),
403    }
404
405    // Validate compression. The current encrypted archive format always emits
406    // deflate chunks, and the Rust decryptor always inflates chunks as deflate.
407    if config.compression != "deflate" {
408        errors.push(format!(
409            "compression must be 'deflate'; got '{}'. The current encrypted pages format supports only deflate.",
410            config.compression
411        ));
412    }
413
414    // Validate chunk_size
415    if config.payload.chunk_size == 0 {
416        errors.push("chunk_size cannot be zero".to_string());
417    }
418    if config.payload.chunk_size > MAX_CONFIG_CHUNK_SIZE {
419        errors.push(format!(
420            "chunk_size {} exceeds maximum {}",
421            config.payload.chunk_size, MAX_CONFIG_CHUNK_SIZE
422        ));
423    }
424
425    // Empty encrypted exports are valid: a zero-byte input produces an empty
426    // file list and the decryptors concatenate zero chunks into an empty DB
427    // byte buffer.
428
429    // Validate files list matches chunk_count
430    if config.payload.files.len() != config.payload.chunk_count {
431        errors.push(format!(
432            "files list length ({}) doesn't match chunk_count ({})",
433            config.payload.files.len(),
434            config.payload.chunk_count
435        ));
436    }
437
438    // Validate payload file paths (relative, under payload/, no parent traversal)
439    for (i, file) in config.payload.files.iter().enumerate() {
440        let path = Path::new(file);
441        if path.is_absolute() {
442            errors.push(format!("payload.files[{}] must be relative", i));
443        }
444        if path
445            .components()
446            .any(|c| matches!(c, std::path::Component::ParentDir))
447        {
448            errors.push(format!("payload.files[{}] must not contain '..'", i));
449        }
450        if !path.starts_with("payload") {
451            errors.push(format!("payload.files[{}] must reside under payload/", i));
452        }
453    }
454
455    // Validate key_slots
456    if config.key_slots.is_empty() {
457        errors.push("key_slots cannot be empty".to_string());
458    }
459
460    for (i, slot) in config.key_slots.iter().enumerate() {
461        // Validate slot.salt is base64
462        if BASE64_STANDARD.decode(&slot.salt).is_err() {
463            errors.push(format!("key_slot[{}].salt is not valid base64", i));
464        }
465
466        // Validate slot.wrapped_dek is base64
467        if BASE64_STANDARD.decode(&slot.wrapped_dek).is_err() {
468            errors.push(format!("key_slot[{}].wrapped_dek is not valid base64", i));
469        }
470
471        // Validate slot.nonce is base64
472        if BASE64_STANDARD.decode(&slot.nonce).is_err() {
473            errors.push(format!("key_slot[{}].nonce is not valid base64", i));
474        }
475    }
476
477    errors
478}
479
480fn validate_unencrypted_config(config: &UnencryptedConfig) -> Vec<String> {
481    let mut errors = Vec::new();
482
483    if config.encrypted {
484        errors.push("unencrypted config must set encrypted=false".to_string());
485    }
486
487    if config.version.trim().is_empty() {
488        errors.push("version cannot be empty".to_string());
489    }
490
491    if config.payload.path.trim().is_empty() {
492        errors.push("payload.path cannot be empty".to_string());
493    } else {
494        let path = Path::new(&config.payload.path);
495        validate_payload_path(&mut errors, "payload.path", path);
496    }
497
498    let valid_formats = ["sqlite"];
499    if !valid_formats.contains(&config.payload.format.as_str()) {
500        errors.push(format!(
501            "payload.format should be one of {:?}, got '{}'",
502            valid_formats, config.payload.format
503        ));
504    }
505
506    errors
507}
508
509fn validate_payload_path(errors: &mut Vec<String>, label: &str, path: &Path) -> bool {
510    let mut ok = true;
511    if path.is_absolute() {
512        errors.push(format!("{label} must be relative"));
513        ok = false;
514    }
515    if path
516        .components()
517        .any(|c| matches!(c, std::path::Component::ParentDir))
518    {
519        errors.push(format!("{label} must not contain '..'"));
520        ok = false;
521    }
522    if !path.starts_with("payload") {
523        errors.push(format!("{label} must reside under payload/"));
524        ok = false;
525    }
526    ok
527}
528
529/// Check payload manifest validity
530fn check_payload_manifest(site_dir: &Path) -> CheckResult {
531    let config_path = site_dir.join("config.json");
532    let payload_dir = site_dir.join("payload");
533
534    if !payload_dir.exists() {
535        return CheckResult::fail("payload/ directory not found");
536    }
537
538    // Parse config for expected payload
539    let config: ArchiveConfig = match File::open(&config_path)
540        .and_then(|f| Ok(serde_json::from_reader(BufReader::new(f))?))
541    {
542        Ok(c) => c,
543        Err(_) => return CheckResult::fail("Could not parse config.json"),
544    };
545
546    let mut errors = Vec::new();
547
548    match &config {
549        ArchiveConfig::Encrypted(enc) => {
550            // Check each expected chunk file exists
551            for (i, expected_file) in enc.payload.files.iter().enumerate() {
552                // Security: Verify filename follows expected pattern first (defense-in-depth)
553                // This also implicitly prevents path traversal since valid patterns are "payload/chunk-NNNNN.bin"
554                let expected_name = format!("payload/chunk-{:05}.bin", i);
555                if *expected_file != expected_name {
556                    errors.push(format!(
557                        "Chunk {} has unexpected name: {} (expected {})",
558                        i, expected_file, expected_name
559                    ));
560                    // Skip existence check for malformed paths to prevent path traversal
561                    continue;
562                }
563
564                let chunk_path = site_dir.join(expected_file);
565                match fs::symlink_metadata(&chunk_path) {
566                    Ok(meta) => {
567                        let file_type = meta.file_type();
568                        if file_type.is_symlink() {
569                            errors.push(format!("{expected_file} must not be a symlink"));
570                        } else if !file_type.is_file() {
571                            errors.push(format!("{expected_file} must be a regular file"));
572                        }
573                    }
574                    Err(_) => errors.push(format!("Missing chunk file: {}", expected_file)),
575                }
576            }
577
578            // Inventory chunk files to detect malformed names and out-of-range indices.
579            match fs::read_dir(&payload_dir) {
580                Ok(entries) => {
581                    for entry in entries {
582                        let entry = match entry {
583                            Ok(entry) => entry,
584                            Err(err) => {
585                                errors
586                                    .push(format!("Failed to read payload directory entry: {err}"));
587                                continue;
588                            }
589                        };
590                        let name = entry.file_name();
591                        let name_str = name.to_string_lossy();
592                        if !name_str.starts_with("chunk-") || !name_str.ends_with(".bin") {
593                            continue;
594                        }
595
596                        let Some(num_str) = name_str
597                            .strip_prefix("chunk-")
598                            .and_then(|s| s.strip_suffix(".bin"))
599                        else {
600                            errors.push(format!("Malformed chunk filename: {name_str}"));
601                            continue;
602                        };
603
604                        if num_str.len() < 5 || !num_str.chars().all(|c| c.is_ascii_digit()) {
605                            errors.push(format!("Malformed chunk filename: {name_str}"));
606                            continue;
607                        }
608
609                        let idx = match num_str.parse::<usize>() {
610                            Ok(idx) => idx,
611                            Err(_) => {
612                                errors.push(format!("Malformed chunk filename: {name_str}"));
613                                continue;
614                            }
615                        };
616
617                        if idx >= enc.payload.files.len() {
618                            errors.push(format!("Unexpected chunk file index: chunk-{idx:05}.bin"));
619                        }
620                    }
621                }
622                Err(err) => errors.push(format!("Failed to read payload/ directory: {err}")),
623            }
624        }
625        ArchiveConfig::Unencrypted(unenc) => {
626            let rel_path = Path::new(&unenc.payload.path);
627            if validate_payload_path(&mut errors, "payload.path", rel_path) {
628                let payload_path = site_dir.join(rel_path);
629                match fs::symlink_metadata(&payload_path) {
630                    Ok(meta) => {
631                        let file_type = meta.file_type();
632                        if file_type.is_symlink() {
633                            errors.push(format!("{} must not be a symlink", unenc.payload.path));
634                        } else if !file_type.is_file() {
635                            errors.push(format!("{} must be a regular file", unenc.payload.path));
636                        }
637                    }
638                    Err(_) => errors.push(format!("Missing payload file: {}", unenc.payload.path)),
639                }
640            }
641        }
642    }
643
644    if errors.is_empty() {
645        CheckResult::pass()
646    } else {
647        CheckResult::fail(errors.join("; "))
648    }
649}
650
651/// Check size limits for chunk files
652fn check_size_limits(site_dir: &Path) -> CheckResult {
653    let mut errors = Vec::new();
654
655    let config_path = site_dir.join("config.json");
656    let config: ArchiveConfig = match File::open(&config_path)
657        .context("Failed to open config.json")
658        .and_then(|f| serde_json::from_reader(BufReader::new(f)).context("Failed to parse JSON"))
659    {
660        Ok(c) => c,
661        Err(e) => {
662            return CheckResult::fail(format!("Failed to parse config.json: {}", e));
663        }
664    };
665
666    match &config {
667        ArchiveConfig::Encrypted(_) => {
668            let payload_dir = site_dir.join("payload");
669            if !payload_dir.is_dir() {
670                errors.push("payload/ directory not found for size check".to_string());
671            } else {
672                match fs::read_dir(&payload_dir) {
673                    Ok(entries) => {
674                        for entry in entries {
675                            let entry = match entry {
676                                Ok(entry) => entry,
677                                Err(err) => {
678                                    errors.push(format!(
679                                        "Failed to read payload directory entry: {err}"
680                                    ));
681                                    continue;
682                                }
683                            };
684                            let path = entry.path();
685                            if path.extension().map(|e| e == "bin").unwrap_or(false) {
686                                match fs::symlink_metadata(&path) {
687                                    Ok(meta) => {
688                                        let file_type = meta.file_type();
689                                        if file_type.is_symlink() {
690                                            errors.push(format!(
691                                                "{} must not be a symlink",
692                                                path.file_name()
693                                                    .unwrap_or_default()
694                                                    .to_string_lossy()
695                                            ));
696                                            continue;
697                                        }
698                                        if !file_type.is_file() {
699                                            errors.push(format!(
700                                                "{} must be a regular file",
701                                                path.file_name()
702                                                    .unwrap_or_default()
703                                                    .to_string_lossy()
704                                            ));
705                                            continue;
706                                        }
707                                        if meta.len() > MAX_CHUNK_SIZE {
708                                            errors.push(format!(
709                                                "{} exceeds 100MB limit ({} bytes)",
710                                                path.file_name()
711                                                    .unwrap_or_default()
712                                                    .to_string_lossy(),
713                                                meta.len()
714                                            ));
715                                        }
716                                    }
717                                    Err(err) => errors.push(format!(
718                                        "failed to stat {}: {}",
719                                        path.file_name().unwrap_or_default().to_string_lossy(),
720                                        err
721                                    )),
722                                }
723                            }
724                        }
725                    }
726                    Err(err) => errors.push(format!("Failed to read payload/ directory: {err}")),
727                }
728            }
729        }
730        ArchiveConfig::Unencrypted(unenc) => {
731            let payload_path = Path::new(&unenc.payload.path);
732            if validate_payload_path(&mut errors, "payload.path", payload_path) {
733                let payload_path = site_dir.join(payload_path);
734                if !payload_path.exists() {
735                    errors.push(format!(
736                        "payload file not found for size check: {}",
737                        unenc.payload.path
738                    ));
739                } else {
740                    match fs::symlink_metadata(&payload_path) {
741                        Ok(meta) => {
742                            let file_type = meta.file_type();
743                            if file_type.is_symlink() {
744                                errors
745                                    .push(format!("{} must not be a symlink", unenc.payload.path));
746                            } else if !file_type.is_file() {
747                                errors
748                                    .push(format!("{} must be a regular file", unenc.payload.path));
749                            } else if meta.len() > MAX_CHUNK_SIZE {
750                                errors.push(format!(
751                                    "{} exceeds 100MB limit ({} bytes)",
752                                    unenc.payload.path,
753                                    meta.len()
754                                ));
755                            }
756                        }
757                        Err(err) => errors.push(format!(
758                            "failed to stat payload file {}: {}",
759                            unenc.payload.path, err
760                        )),
761                    }
762                }
763            }
764        }
765    }
766
767    if errors.is_empty() {
768        CheckResult::pass()
769    } else {
770        CheckResult::fail(errors.join("; "))
771    }
772}
773
774/// Check integrity.json hashes match file contents
775fn check_integrity(site_dir: &Path, verbose: bool) -> CheckResult {
776    let integrity_path = site_dir.join("integrity.json");
777
778    let manifest: IntegrityManifest = match File::open(&integrity_path)
779        .context("Failed to open integrity.json")
780        .and_then(|f| serde_json::from_reader(BufReader::new(f)).context("Failed to parse JSON"))
781    {
782        Ok(m) => m,
783        Err(e) => return CheckResult::fail(format!("Failed to parse integrity.json: {}", e)),
784    };
785
786    let mut errors = Vec::new();
787    let mut checked_files: HashSet<String> = HashSet::new();
788    let canonical_site = match site_dir.canonicalize() {
789        Ok(path) => path,
790        Err(e) => {
791            return CheckResult::fail(format!(
792                "Failed to resolve site directory for integrity checks: {}",
793                e
794            ));
795        }
796    };
797
798    // Verify each file in manifest
799    for (rel_path, entry) in &manifest.files {
800        checked_files.insert(rel_path.clone());
801
802        if let Some(reason) = detect_encoded_path_violation(rel_path) {
803            errors.push(format!(
804                "integrity.json contains {reason} (security violation): {}",
805                rel_path
806            ));
807            continue;
808        }
809
810        // Security: Validate path doesn't escape site_dir via traversal
811        let path = Path::new(rel_path);
812        if path.is_absolute() {
813            errors.push(format!(
814                "integrity.json contains absolute path (security violation): {}",
815                rel_path
816            ));
817            continue;
818        }
819        if path
820            .components()
821            .any(|c| matches!(c, std::path::Component::ParentDir))
822        {
823            errors.push(format!(
824                "integrity.json contains path traversal (security violation): {}",
825                rel_path
826            ));
827            continue;
828        }
829
830        let file_path = site_dir.join(rel_path);
831        let metadata = match fs::symlink_metadata(&file_path) {
832            Ok(meta) => meta,
833            Err(_) => {
834                errors.push(format!("File in manifest but missing: {}", rel_path));
835                continue;
836            }
837        };
838
839        let file_type = metadata.file_type();
840
841        if !file_type.is_file() && !file_type.is_symlink() {
842            errors.push(format!(
843                "integrity.json references non-file entry (security violation): {}",
844                rel_path
845            ));
846            continue;
847        }
848
849        // Resolve symlinks and ensure final target remains within site_dir.
850        let canonical_file = match file_path.canonicalize() {
851            Ok(path) => path,
852            Err(_) => {
853                errors.push(format!("File in manifest but missing: {}", rel_path));
854                continue;
855            }
856        };
857        if !canonical_file.starts_with(&canonical_site) {
858            errors.push(format!(
859                "integrity.json path escapes site directory (security violation): {}",
860                rel_path
861            ));
862            continue;
863        }
864
865        // For symlinks, only permit links to regular files within site_dir.
866        if file_type.is_symlink() {
867            match fs::metadata(&file_path) {
868                Ok(target_meta) if target_meta.file_type().is_file() => {}
869                Ok(_) => {
870                    errors.push(format!(
871                        "integrity.json symlink target is not a regular file (security violation): {}",
872                        rel_path
873                    ));
874                    continue;
875                }
876                Err(e) => {
877                    errors.push(format!(
878                        "Failed to resolve symlink target for {}: {}",
879                        rel_path, e
880                    ));
881                    continue;
882                }
883            }
884        }
885
886        // Fast-fail on size mismatch before the expensive SHA256 hash.
887        // Use the canonical path so symlinks resolve to the actual target size.
888        if let Ok(actual_meta) = fs::metadata(&canonical_file)
889            && actual_meta.len() != entry.size
890        {
891            errors.push(format!(
892                "Size mismatch for {}: expected {}, got {}",
893                rel_path,
894                entry.size,
895                actual_meta.len()
896            ));
897            continue;
898        }
899
900        // Compute hash
901        let computed_hash = match compute_file_hash(&file_path) {
902            Ok(h) => h,
903            Err(e) => {
904                errors.push(format!("Failed to hash {}: {}", rel_path, e));
905                continue;
906            }
907        };
908
909        if computed_hash != entry.sha256 {
910            errors.push(format!(
911                "Hash mismatch for {}: expected {}, got {}",
912                rel_path, entry.sha256, computed_hash
913            ));
914        } else if verbose {
915            println!("    ✓ {}", rel_path);
916        }
917    }
918
919    // Check for extra files not in manifest
920    let actual_files = match collect_all_files(site_dir) {
921        Ok(files) => files,
922        Err(e) => return CheckResult::fail(format!("Failed to enumerate files: {}", e)),
923    };
924    for file in actual_files {
925        // Skip integrity.json itself
926        if file == "integrity.json" {
927            continue;
928        }
929        if !checked_files.contains(&file) {
930            errors.push(format!("File not in manifest: {}", file));
931        }
932    }
933
934    if errors.is_empty() {
935        CheckResult::pass()
936    } else {
937        CheckResult::fail(errors.join("; "))
938    }
939}
940
941#[derive(Debug)]
942enum PercentDecodeError {
943    InvalidEncoding,
944    InvalidUtf8,
945    NullByte,
946}
947
948impl fmt::Display for PercentDecodeError {
949    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
950        match self {
951            Self::InvalidEncoding => write!(f, "invalid percent-encoding"),
952            Self::InvalidUtf8 => write!(f, "invalid UTF-8 after percent-decoding"),
953            Self::NullByte => write!(f, "null byte in decoded path"),
954        }
955    }
956}
957
958struct DecodeOutcome {
959    decoded: String,
960    changed: bool,
961}
962
963fn percent_decode_once(input: &str) -> Result<DecodeOutcome, PercentDecodeError> {
964    let bytes = input.as_bytes();
965    let mut out = Vec::with_capacity(bytes.len());
966    let mut i = 0usize;
967    let mut changed = false;
968
969    while i < bytes.len() {
970        if bytes[i] == b'%' {
971            if i + 2 >= bytes.len() {
972                return Err(PercentDecodeError::InvalidEncoding);
973            }
974            let hi = bytes[i + 1];
975            let lo = bytes[i + 2];
976            let hex = [hi, lo];
977            let hex_str =
978                std::str::from_utf8(&hex).map_err(|_| PercentDecodeError::InvalidEncoding)?;
979            let val =
980                u8::from_str_radix(hex_str, 16).map_err(|_| PercentDecodeError::InvalidEncoding)?;
981            out.push(val);
982            i += 3;
983            changed = true;
984            continue;
985        }
986        out.push(bytes[i]);
987        i += 1;
988    }
989
990    if out.contains(&0) {
991        return Err(PercentDecodeError::NullByte);
992    }
993
994    let decoded = String::from_utf8(out).map_err(|_| PercentDecodeError::InvalidUtf8)?;
995    Ok(DecodeOutcome { decoded, changed })
996}
997
998fn contains_path_traversal_like(input: &str) -> bool {
999    input.split(['/', '\\']).any(|segment| segment == "..")
1000}
1001
1002fn is_absolute_like(input: &str) -> bool {
1003    let normalized = input.replace('\\', "/");
1004    if normalized.starts_with('/') || normalized.starts_with("//") {
1005        return true;
1006    }
1007    let bytes = normalized.as_bytes();
1008    bytes.len() >= 2 && bytes[1] == b':' && bytes[0].is_ascii_alphabetic()
1009}
1010
1011/// Check for Unicode characters that are visual look-alikes for path-sensitive
1012/// ASCII characters (`.`, `/`, `\`). These could bypass text-based path checks
1013/// on filesystems that perform Unicode compatibility normalization (NFKC).
1014fn contains_unicode_path_attack(input: &str) -> bool {
1015    for ch in input.chars() {
1016        match ch {
1017            // Fullwidth look-alikes (NFKC maps to ASCII equivalents)
1018            '\u{FF0E}' // FULLWIDTH FULL STOP → .
1019            | '\u{FF0F}' // FULLWIDTH SOLIDUS → /
1020            | '\u{FF3C}' // FULLWIDTH REVERSE SOLIDUS → \
1021            // Small form variants
1022            | '\u{FE52}' // SMALL FULL STOP → .
1023            // Dot leaders / ellipsis components
1024            | '\u{2024}' // ONE DOT LEADER → .
1025            // Halfwidth forms
1026            | '\u{FF61}' // HALFWIDTH IDEOGRAPHIC FULL STOP
1027            // Combining characters that could modify path-sensitive chars
1028            | '\u{0338}' // COMBINING LONG SOLIDUS OVERLAY (could visually disguise)
1029            | '\u{0337}' // COMBINING SHORT SOLIDUS OVERLAY
1030            // Zero-width characters (invisible, could split tokens)
1031            | '\u{200D}' // ZERO WIDTH JOINER
1032            | '\u{200C}' // ZERO WIDTH NON-JOINER
1033            | '\u{200B}' // ZERO WIDTH SPACE
1034            | '\u{FEFF}' // BYTE ORDER MARK / ZERO WIDTH NO-BREAK SPACE
1035            // Right-to-left override (can visually reverse path display)
1036            | '\u{202E}' // RIGHT-TO-LEFT OVERRIDE
1037            | '\u{202D}' // LEFT-TO-RIGHT OVERRIDE
1038            | '\u{202C}' // POP DIRECTIONAL FORMATTING
1039            | '\u{202A}' // LEFT-TO-RIGHT EMBEDDING
1040            | '\u{202B}' // RIGHT-TO-LEFT EMBEDDING
1041            | '\u{2066}' // LEFT-TO-RIGHT ISOLATE
1042            | '\u{2067}' // RIGHT-TO-LEFT ISOLATE
1043            | '\u{2068}' // FIRST STRONG ISOLATE
1044            | '\u{2069}' // POP DIRECTIONAL ISOLATE
1045            // Confusable slash characters
1046            | '\u{2044}' // FRACTION SLASH (visually similar to /)
1047            | '\u{2215}' // DIVISION SLASH (visually similar to /)
1048            | '\u{29F8}' // BIG SOLIDUS
1049            | '\u{1735}' // PHILIPPINE SINGLE PUNCTUATION (looks like /)
1050            // Confusable dot characters
1051            | '\u{2E2E}' // REVERSED QUESTION MARK (can look like period in some fonts)
1052            | '\u{0701}' // SYRIAC SUPRALINEAR FULL STOP
1053            | '\u{0702}' // SYRIAC SUBLINEAR FULL STOP
1054            | '\u{A60E}' // VAI FULL STOP
1055            | '\u{10A50}' // KHAROSHTHI PUNCTUATION DOT
1056            => return true,
1057            _ => {}
1058        }
1059    }
1060    false
1061}
1062
1063fn detect_encoded_path_violation(rel_path: &str) -> Option<String> {
1064    if contains_path_traversal_like(rel_path) {
1065        return Some("path traversal".to_string());
1066    }
1067    if is_absolute_like(rel_path) {
1068        return Some("absolute path".to_string());
1069    }
1070    if contains_unicode_path_attack(rel_path) {
1071        return Some("unicode normalization attack".to_string());
1072    }
1073
1074    if !rel_path.contains('%') {
1075        return None;
1076    }
1077
1078    let mut current = rel_path.to_string();
1079    for _ in 0..3 {
1080        let outcome = match percent_decode_once(&current) {
1081            Ok(o) => o,
1082            Err(e) => return Some(e.to_string()),
1083        };
1084        if !outcome.changed {
1085            break;
1086        }
1087        current = outcome.decoded;
1088        if contains_path_traversal_like(&current) {
1089            return Some("url-encoded path traversal".to_string());
1090        }
1091        if is_absolute_like(&current) {
1092            return Some("url-encoded absolute path".to_string());
1093        }
1094        if contains_unicode_path_attack(&current) {
1095            return Some("url-encoded unicode normalization attack".to_string());
1096        }
1097        if !current.contains('%') {
1098            break;
1099        }
1100    }
1101
1102    None
1103}
1104
1105/// Check for secret leakage in site/
1106fn check_no_secrets(site_dir: &Path) -> CheckResult {
1107    let mut errors = Vec::new();
1108
1109    // Check for forbidden files
1110    for file in SECRET_FILES {
1111        let path = site_dir.join(file);
1112        if fs::symlink_metadata(&path).is_ok() {
1113            errors.push(format!("Secret file found in site/: {}", file));
1114        }
1115    }
1116
1117    // Check for forbidden directories
1118    for dir in SECRET_DIRS {
1119        let path = site_dir.join(dir);
1120        if let Ok(metadata) = fs::symlink_metadata(&path) {
1121            let file_type = metadata.file_type();
1122            if file_type.is_dir() || file_type.is_symlink() {
1123                errors.push(format!("Secret directory found in site/: {}/", dir));
1124            }
1125        }
1126    }
1127
1128    // Recursive scan: detect secret files/dirs hidden in subdirectories
1129    find_secrets_recursive(site_dir, site_dir, &mut errors);
1130
1131    // Check config.json doesn't contain plaintext secrets.
1132    // Walk the parsed JSON tree instead of doing brittle raw substring checks so
1133    // formatting changes like `"secret" : "..."` or nested objects can't hide leakage.
1134    let config_path = site_dir.join("config.json");
1135    if config_path.exists()
1136        && let Ok(content) = fs::read_to_string(&config_path)
1137        && let Ok(config_json) = serde_json::from_str::<Value>(&content)
1138    {
1139        find_forbidden_config_keys(&config_json, "", &mut errors);
1140    }
1141
1142    if errors.is_empty() {
1143        CheckResult::pass()
1144    } else {
1145        CheckResult::fail(errors.join("; "))
1146    }
1147}
1148
1149fn find_forbidden_config_keys(value: &Value, current_path: &str, findings: &mut Vec<String>) {
1150    match value {
1151        Value::Object(map) => {
1152            for (key, child) in map {
1153                let child_path = if current_path.is_empty() {
1154                    key.clone()
1155                } else {
1156                    format!("{current_path}.{key}")
1157                };
1158                if let Some((_, description)) = FORBIDDEN_CONFIG_KEYS
1159                    .iter()
1160                    .find(|(forbidden, _)| key.eq_ignore_ascii_case(forbidden))
1161                {
1162                    findings.push(format!(
1163                        "config.json contains forbidden field: {} at {}",
1164                        description, child_path
1165                    ));
1166                }
1167                find_forbidden_config_keys(child, &child_path, findings);
1168            }
1169        }
1170        Value::Array(items) => {
1171            for (idx, child) in items.iter().enumerate() {
1172                let child_path = if current_path.is_empty() {
1173                    format!("[{idx}]")
1174                } else {
1175                    format!("{current_path}[{idx}]")
1176                };
1177                find_forbidden_config_keys(child, &child_path, findings);
1178            }
1179        }
1180        _ => {}
1181    }
1182}
1183
1184/// Recursively scan a directory tree for secret files and directories.
1185/// Finds entries whose name (not full path) matches SECRET_FILES or SECRET_DIRS
1186/// at any depth, catching secrets hidden in subdirectories.
1187fn find_secrets_recursive(base: &Path, current: &Path, findings: &mut Vec<String>) {
1188    let entries = match fs::read_dir(current) {
1189        Ok(entries) => entries,
1190        Err(_) => return,
1191    };
1192
1193    for entry in entries.flatten() {
1194        let path = entry.path();
1195        let file_type = match entry.file_type() {
1196            Ok(file_type) => file_type,
1197            Err(_) => continue,
1198        };
1199        let name = match entry.file_name().to_str() {
1200            Some(n) => n.to_string(),
1201            None => continue,
1202        };
1203        let is_secret_file = SECRET_FILES.contains(&name.as_str());
1204        let is_secret_dir = SECRET_DIRS.contains(&name.as_str());
1205
1206        let rel_path = path
1207            .strip_prefix(base)
1208            .unwrap_or(&path)
1209            .to_string_lossy()
1210            .replace('\\', "/");
1211
1212        if file_type.is_dir() {
1213            if is_secret_dir {
1214                // Skip if this is a top-level match (already caught above)
1215                if current != base {
1216                    findings.push(format!(
1217                        "Secret directory found in site subdirectory: {}/",
1218                        rel_path
1219                    ));
1220                }
1221            }
1222            // Only recurse into real directories. Symlinked directories are handled below
1223            // so a malicious or accidental loop cannot drag verification outside site/.
1224            find_secrets_recursive(base, &path, findings);
1225        } else if file_type.is_symlink() {
1226            if is_secret_dir {
1227                if current != base {
1228                    findings.push(format!(
1229                        "Secret directory found in site subdirectory: {}/",
1230                        rel_path
1231                    ));
1232                }
1233            } else if is_secret_file && current != base {
1234                findings.push(format!(
1235                    "Secret file found in site subdirectory: {}",
1236                    rel_path
1237                ));
1238            }
1239        } else if file_type.is_file() && is_secret_file {
1240            // Skip if this is a top-level match (already caught above)
1241            if current != base {
1242                findings.push(format!(
1243                    "Secret file found in site subdirectory: {}",
1244                    rel_path
1245                ));
1246            }
1247        }
1248    }
1249}
1250
1251/// Compute SHA256 hash of a file
1252fn compute_file_hash(path: &Path) -> Result<String> {
1253    let file = File::open(path)?;
1254    let mut reader = BufReader::new(file);
1255    let mut hasher = Sha256::new();
1256    let mut buffer = [0u8; 8192];
1257
1258    loop {
1259        let bytes_read = reader.read(&mut buffer)?;
1260        if bytes_read == 0 {
1261            break;
1262        }
1263        hasher.update(&buffer[..bytes_read]);
1264    }
1265
1266    // sha2 ≥ 0.11 dropped `LowerHex` on the digest output;
1267    // `hex::encode` produces the same lowercase-hex representation.
1268    Ok(hex::encode(hasher.finalize()))
1269}
1270
1271/// Collect all files in a directory recursively
1272fn collect_all_files(dir: &Path) -> Result<Vec<String>> {
1273    let mut files = Vec::new();
1274    collect_files_recursive(dir, dir, &mut files)?;
1275    Ok(files)
1276}
1277
1278fn collect_files_recursive(base: &Path, current: &Path, files: &mut Vec<String>) -> Result<()> {
1279    for entry in fs::read_dir(current)? {
1280        let entry = entry?;
1281        let path = entry.path();
1282        let metadata = fs::symlink_metadata(&path)?;
1283        let file_type = metadata.file_type();
1284
1285        if file_type.is_symlink() {
1286            if let Ok(rel) = path.strip_prefix(base) {
1287                files.push(rel.to_string_lossy().replace('\\', "/"));
1288            }
1289            continue;
1290        }
1291
1292        if file_type.is_dir() {
1293            collect_files_recursive(base, &path, files)?;
1294        } else if file_type.is_file()
1295            && let Ok(rel) = path.strip_prefix(base)
1296        {
1297            files.push(rel.to_string_lossy().replace('\\', "/"));
1298        }
1299    }
1300    Ok(())
1301}
1302
1303/// Calculate total size of a directory
1304fn calculate_dir_size(dir: &Path) -> Result<u64> {
1305    let mut total = 0u64;
1306
1307    fn calc_recursive(path: &Path, total: &mut u64) -> Result<()> {
1308        let metadata = fs::symlink_metadata(path)?;
1309        let file_type = metadata.file_type();
1310
1311        if file_type.is_symlink() {
1312            return Ok(());
1313        }
1314
1315        if file_type.is_dir() {
1316            for entry in fs::read_dir(path)? {
1317                calc_recursive(&entry?.path(), total)?;
1318            }
1319        } else if file_type.is_file() {
1320            *total += metadata.len();
1321        }
1322        Ok(())
1323    }
1324
1325    calc_recursive(dir, &mut total)?;
1326    Ok(total)
1327}
1328
1329/// Print verification result in human-readable format
1330pub fn print_result(result: &VerifyResult, verbose: bool) {
1331    let status_icon = if result.status == "valid" {
1332        "✓"
1333    } else {
1334        "✗"
1335    };
1336    println!(
1337        "\n{} Bundle status: {}",
1338        status_icon,
1339        result.status.to_uppercase()
1340    );
1341
1342    println!("\nChecks:");
1343    print_check("  Required files", &result.checks.required_files, verbose);
1344    print_check("  Config schema", &result.checks.config_schema, verbose);
1345    print_check(
1346        "  Payload manifest",
1347        &result.checks.payload_manifest,
1348        verbose,
1349    );
1350    print_check("  Size limits", &result.checks.size_limits, verbose);
1351    print_check("  Integrity", &result.checks.integrity, verbose);
1352    print_check("  No secrets", &result.checks.no_secrets_in_site, verbose);
1353
1354    if !result.warnings.is_empty() {
1355        println!("\nWarnings:");
1356        for warning in &result.warnings {
1357            println!("  ⚠ {}", warning);
1358        }
1359    }
1360
1361    println!(
1362        "\nTotal site size: {} bytes ({:.2} MB)",
1363        result.site_size_bytes,
1364        result.site_size_bytes as f64 / (1024.0 * 1024.0)
1365    );
1366}
1367
1368fn print_check(name: &str, result: &CheckResult, verbose: bool) {
1369    let icon = if result.passed { "✓" } else { "✗" };
1370    print!("{}: {} ", name, icon);
1371
1372    if result.passed {
1373        println!("OK");
1374    } else if let Some(details) = &result.details {
1375        if verbose {
1376            println!("FAILED");
1377            println!("      {}", details);
1378        } else {
1379            // Truncate long error messages (char-safe slicing)
1380            let display = if details.chars().count() > 60 {
1381                let truncated: String = details.chars().take(60).collect();
1382                format!("{truncated}...")
1383            } else {
1384                details.clone()
1385            };
1386            println!("FAILED: {}", display);
1387        }
1388    } else {
1389        println!("FAILED");
1390    }
1391}
1392
1393#[cfg(test)]
1394mod tests {
1395    use super::*;
1396    use crate::pages::bundle::IntegrityEntry;
1397    use std::collections::BTreeMap;
1398    use std::path::PathBuf;
1399    use tempfile::TempDir;
1400
1401    /// Path to the pages_verify fixtures directory
1402    fn fixtures_dir() -> PathBuf {
1403        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/pages_verify")
1404    }
1405
1406    /// Copy a fixture directory to the destination.
1407    /// `fixture_name` is the subdirectory under tests/fixtures/pages_verify/ (e.g., "valid", "unencrypted")
1408    fn copy_fixture(fixture_name: &str, dest: &Path) -> Result<()> {
1409        let src = fixtures_dir().join(fixture_name).join("site");
1410        copy_dir_recursive(&src, dest)
1411    }
1412
1413    /// Recursively copy a directory and its contents
1414    fn copy_dir_recursive(src: &Path, dest: &Path) -> Result<()> {
1415        if !dest.exists() {
1416            fs::create_dir_all(dest)?;
1417        }
1418        for entry in fs::read_dir(src)? {
1419            let entry = entry?;
1420            let file_type = entry.file_type()?;
1421            let dest_path = dest.join(entry.file_name());
1422            if file_type.is_dir() {
1423                copy_dir_recursive(&entry.path(), &dest_path)?;
1424            } else {
1425                fs::copy(entry.path(), &dest_path)?;
1426            }
1427        }
1428        Ok(())
1429    }
1430
1431    fn assert_integrity_path_blocked(rel_path: &str) {
1432        let temp = TempDir::new().unwrap();
1433        let site_dir = temp.path();
1434
1435        let mut files = BTreeMap::new();
1436        files.insert(
1437            rel_path.to_string(),
1438            IntegrityEntry {
1439                sha256: "deadbeef".repeat(8),
1440                size: 100,
1441            },
1442        );
1443        let manifest = IntegrityManifest {
1444            version: 1,
1445            generated_at: "2025-01-01T00:00:00Z".to_string(),
1446            files,
1447        };
1448        let manifest_json = serde_json::to_string(&manifest).unwrap();
1449        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
1450
1451        let result = check_integrity(site_dir, false);
1452        assert!(!result.passed, "Path should be blocked: {rel_path}");
1453        assert!(
1454            result
1455                .details
1456                .as_ref()
1457                .map(|d| d.contains("security violation"))
1458                .unwrap_or(false),
1459            "Should mention security violation"
1460        );
1461    }
1462
1463    #[test]
1464    #[cfg(unix)]
1465    fn test_collect_all_files_lists_symlink_without_recursing() {
1466        use std::os::unix::fs::symlink;
1467
1468        let temp = TempDir::new().unwrap();
1469        let outside = TempDir::new().unwrap();
1470
1471        fs::write(temp.path().join("root.txt"), "root").unwrap();
1472        fs::create_dir_all(outside.path().join("nested")).unwrap();
1473        fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
1474        symlink(
1475            outside.path().join("nested"),
1476            temp.path().join("linked-dir"),
1477        )
1478        .unwrap();
1479
1480        let files = collect_all_files(temp.path()).unwrap();
1481        assert!(files.contains(&"root.txt".to_string()));
1482        assert!(files.contains(&"linked-dir".to_string()));
1483        assert!(!files.iter().any(|f| f.starts_with("linked-dir/")));
1484    }
1485
1486    #[test]
1487    #[cfg(unix)]
1488    fn test_calculate_dir_size_skips_symlink_targets() {
1489        use std::os::unix::fs::symlink;
1490
1491        let temp = TempDir::new().unwrap();
1492        let outside = TempDir::new().unwrap();
1493
1494        fs::write(temp.path().join("small.txt"), vec![0u8; 8]).unwrap();
1495        fs::write(outside.path().join("large.bin"), vec![0u8; 8192]).unwrap();
1496        symlink(
1497            outside.path().join("large.bin"),
1498            temp.path().join("linked.bin"),
1499        )
1500        .unwrap();
1501
1502        let size = calculate_dir_size(temp.path()).unwrap();
1503        assert_eq!(size, 8);
1504    }
1505
1506    #[test]
1507    #[cfg(unix)]
1508    fn test_integrity_rejects_symlink_manifest_entry_to_directory() {
1509        use std::os::unix::fs::symlink;
1510
1511        let temp = TempDir::new().unwrap();
1512        let site_dir = temp.path();
1513        fs::create_dir_all(site_dir.join("payload/real-dir")).unwrap();
1514        fs::write(site_dir.join("payload/real-dir/content.txt"), b"payload").unwrap();
1515        symlink(
1516            site_dir.join("payload/real-dir"),
1517            site_dir.join("payload/alias-dir"),
1518        )
1519        .unwrap();
1520
1521        let mut files = BTreeMap::new();
1522        files.insert(
1523            "payload/alias-dir".to_string(),
1524            IntegrityEntry {
1525                // Hash/size are irrelevant here; verification should fail before hashing.
1526                sha256: "deadbeef".repeat(8),
1527                size: 0,
1528            },
1529        );
1530        let manifest = IntegrityManifest {
1531            version: 1,
1532            generated_at: "2025-01-01T00:00:00Z".to_string(),
1533            files,
1534        };
1535        fs::write(
1536            site_dir.join("integrity.json"),
1537            serde_json::to_string(&manifest).unwrap(),
1538        )
1539        .unwrap();
1540
1541        let result = check_integrity(site_dir, false);
1542        assert!(!result.passed);
1543        assert!(
1544            result
1545                .details
1546                .as_ref()
1547                .map(|d| d.contains("not a regular file"))
1548                .unwrap_or(false)
1549        );
1550    }
1551
1552    #[test]
1553    fn test_verify_minimal_valid_site() {
1554        let temp = TempDir::new().unwrap();
1555        let site_dir = temp.path().join("site");
1556
1557        // Copy the valid fixture to temp directory
1558        copy_fixture("valid", &site_dir).unwrap();
1559
1560        let result = verify_bundle(&site_dir, true).unwrap();
1561
1562        // Debug: print which checks failed
1563        if !result.checks.required_files.passed {
1564            eprintln!(
1565                "FAILED: required_files - {:?}",
1566                result.checks.required_files.details
1567            );
1568        }
1569        if !result.checks.config_schema.passed {
1570            eprintln!(
1571                "FAILED: config_schema - {:?}",
1572                result.checks.config_schema.details
1573            );
1574        }
1575        if !result.checks.payload_manifest.passed {
1576            eprintln!(
1577                "FAILED: payload_manifest - {:?}",
1578                result.checks.payload_manifest.details
1579            );
1580        }
1581        if !result.checks.size_limits.passed {
1582            eprintln!(
1583                "FAILED: size_limits - {:?}",
1584                result.checks.size_limits.details
1585            );
1586        }
1587        if !result.checks.integrity.passed {
1588            eprintln!("FAILED: integrity - {:?}", result.checks.integrity.details);
1589        }
1590        if !result.checks.no_secrets_in_site.passed {
1591            eprintln!(
1592                "FAILED: no_secrets_in_site - {:?}",
1593                result.checks.no_secrets_in_site.details
1594            );
1595        }
1596
1597        assert_eq!(result.status, "valid");
1598        assert!(result.checks.required_files.passed);
1599        assert!(result.checks.config_schema.passed);
1600    }
1601
1602    #[test]
1603    fn test_config_schema_allows_zero_chunk_encrypted_archive() {
1604        let temp = TempDir::new().unwrap();
1605        let site_dir = temp.path().join("site");
1606        fs::create_dir_all(&site_dir).unwrap();
1607
1608        let config = r#"{
1609          "version": 2,
1610          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
1611          "base_nonce": "AAAAAAAAAAAAAAAA",
1612          "compression": "deflate",
1613          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
1614          "payload": {
1615            "chunk_size": 1024,
1616            "chunk_count": 0,
1617            "total_compressed_size": 0,
1618            "total_plaintext_size": 0,
1619            "files": []
1620          },
1621          "key_slots": [{
1622            "id": 0,
1623            "slot_type": "password",
1624            "kdf": "argon2id",
1625            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
1626            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
1627            "nonce": "AAAAAAAAAAAAAAAA",
1628            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
1629          }]
1630        }"#;
1631        fs::write(site_dir.join("config.json"), config).unwrap();
1632
1633        let result = check_config_schema(&site_dir);
1634        assert!(
1635            result.passed,
1636            "zero-chunk encrypted config should match Rust/worker validators: {:?}",
1637            result.details
1638        );
1639    }
1640
1641    #[test]
1642    fn test_verify_unencrypted_site() {
1643        let temp = TempDir::new().unwrap();
1644        let site_dir = temp.path().join("site");
1645
1646        // Copy the unencrypted fixture to temp directory
1647        copy_fixture("unencrypted", &site_dir).unwrap();
1648
1649        let result = verify_bundle(&site_dir, true).unwrap();
1650        assert!(result.checks.config_schema.passed);
1651        assert!(result.checks.payload_manifest.passed);
1652        assert_eq!(result.status, "valid");
1653    }
1654
1655    #[test]
1656    fn test_verify_missing_required_files() {
1657        let temp = TempDir::new().unwrap();
1658        let site_dir = temp.path().join("site");
1659
1660        // Copy the missing_required_no_viewer fixture (missing viewer.js)
1661        copy_fixture("missing_required_no_viewer", &site_dir).unwrap();
1662
1663        let result = verify_bundle(&site_dir, false).unwrap();
1664        assert_eq!(result.status, "invalid");
1665        assert!(!result.checks.required_files.passed);
1666    }
1667
1668    #[test]
1669    fn test_verify_rejects_required_file_replaced_by_directory() {
1670        let temp = TempDir::new().unwrap();
1671        let site_dir = temp.path().join("site");
1672        let viewer_backup = temp.path().join("viewer.js.backup");
1673
1674        copy_fixture("valid", &site_dir).unwrap();
1675        fs::rename(site_dir.join("viewer.js"), &viewer_backup).unwrap();
1676        fs::create_dir(site_dir.join("viewer.js")).unwrap();
1677
1678        let mut manifest: IntegrityManifest = serde_json::from_reader(BufReader::new(
1679            File::open(site_dir.join("integrity.json")).unwrap(),
1680        ))
1681        .unwrap();
1682        manifest.files.remove("viewer.js");
1683        fs::write(
1684            site_dir.join("integrity.json"),
1685            serde_json::to_string(&manifest).unwrap(),
1686        )
1687        .unwrap();
1688
1689        let result = verify_bundle(&site_dir, false).unwrap();
1690        assert_eq!(result.status, "invalid");
1691        assert!(!result.checks.required_files.passed);
1692        assert!(
1693            result
1694                .checks
1695                .required_files
1696                .details
1697                .as_ref()
1698                .map(|details| details.contains("viewer.js (must be a regular file)"))
1699                .unwrap_or(false),
1700            "required file directories should be rejected: {:?}",
1701            result.checks.required_files.details
1702        );
1703    }
1704
1705    #[test]
1706    fn test_verify_invalid_config() {
1707        let temp = TempDir::new().unwrap();
1708        let site_dir = temp.path().join("site");
1709
1710        // Copy valid fixture then overwrite config with invalid one
1711        copy_fixture("valid", &site_dir).unwrap();
1712
1713        // Write invalid config
1714        fs::write(
1715            site_dir.join("config.json"),
1716            r#"{"version": 2, "export_id": "invalid"}"#,
1717        )
1718        .unwrap();
1719
1720        let result = verify_bundle(&site_dir, false).unwrap();
1721        assert!(!result.checks.config_schema.passed);
1722    }
1723
1724    #[test]
1725    fn test_verify_rejects_unsupported_encrypted_compression() {
1726        for compression in ["zstd", "none"] {
1727            let temp = TempDir::new().unwrap();
1728            let site_dir = temp.path().join("site");
1729
1730            copy_fixture("valid", &site_dir).unwrap();
1731            let config_path = site_dir.join("config.json");
1732            let mut config: Value =
1733                serde_json::from_str(&fs::read_to_string(&config_path).unwrap()).unwrap();
1734            config["compression"] = Value::String(compression.to_string());
1735            fs::write(&config_path, serde_json::to_string_pretty(&config).unwrap()).unwrap();
1736
1737            let result = check_config_schema(&site_dir);
1738
1739            assert!(
1740                !result.passed,
1741                "{compression} should fail schema validation"
1742            );
1743            let details = result.details.unwrap_or_default();
1744            assert!(
1745                details.contains("supports only deflate") && details.contains(compression),
1746                "unexpected validation details for {compression}: {details}"
1747            );
1748        }
1749    }
1750
1751    #[test]
1752    fn test_verify_rejects_unsupported_encrypted_schema_version() {
1753        let temp = TempDir::new().unwrap();
1754        let site_dir = temp.path().join("site");
1755
1756        copy_fixture("valid", &site_dir).unwrap();
1757        let config_path = site_dir.join("config.json");
1758        let mut config: Value =
1759            serde_json::from_str(&fs::read_to_string(&config_path).unwrap()).unwrap();
1760        config["version"] = Value::from(1);
1761        fs::write(&config_path, serde_json::to_string_pretty(&config).unwrap()).unwrap();
1762
1763        let result = check_config_schema(&site_dir);
1764
1765        assert!(!result.passed, "unsupported schema version should fail");
1766        let details = result.details.unwrap_or_default();
1767        assert!(
1768            details.contains("version must be 2") && details.contains("got 1"),
1769            "unexpected validation details: {details}"
1770        );
1771    }
1772
1773    #[test]
1774    fn test_verify_rejects_unknown_config_fields() {
1775        let temp = TempDir::new().unwrap();
1776        let site_dir = temp.path().join("site");
1777
1778        copy_fixture("valid", &site_dir).unwrap();
1779        fs::write(
1780            site_dir.join("config.json"),
1781            r#"{
1782                "encrypted": false,
1783                "version": "1.0",
1784                "payload": {
1785                    "path": "payload/data.sqlite",
1786                    "format": "sqlite"
1787                },
1788                "totally_unknown_field": 123
1789            }"#,
1790        )
1791        .unwrap();
1792
1793        let result = verify_bundle(&site_dir, false).unwrap();
1794        assert!(!result.checks.config_schema.passed);
1795        assert!(
1796            result
1797                .checks
1798                .config_schema
1799                .details
1800                .as_ref()
1801                .map(|details| details.contains("unknown field"))
1802                .unwrap_or(false),
1803            "unknown config fields should fail schema validation: {:?}",
1804            result.checks.config_schema.details
1805        );
1806    }
1807
1808    #[test]
1809    fn test_verify_secret_leakage() {
1810        let temp = TempDir::new().unwrap();
1811        let site_dir = temp.path().join("site");
1812
1813        // Copy the secret_leak fixture (contains recovery-secret.txt)
1814        copy_fixture("secret_leak", &site_dir).unwrap();
1815
1816        let result = verify_bundle(&site_dir, false).unwrap();
1817        assert!(!result.checks.no_secrets_in_site.passed);
1818    }
1819
1820    #[test]
1821    fn test_check_no_secrets_flags_nested_config_secret_key_with_whitespace() {
1822        let temp = TempDir::new().unwrap();
1823        let site_dir = temp.path().join("site");
1824        fs::create_dir_all(&site_dir).unwrap();
1825        fs::write(
1826            site_dir.join("config.json"),
1827            r#"{
1828                "encrypted": false,
1829                "version": "1.0",
1830                "payload": { "path": "payload/data.sqlite", "format": "sqlite" },
1831                "metadata": { "secret" : "leaked" }
1832            }"#,
1833        )
1834        .unwrap();
1835
1836        let result = check_no_secrets(&site_dir);
1837        assert!(!result.passed);
1838        assert!(
1839            result
1840                .details
1841                .as_ref()
1842                .map(|details| {
1843                    details.contains(
1844                        "config.json contains forbidden field: secret field at metadata.secret",
1845                    )
1846                })
1847                .unwrap_or(false),
1848            "nested secret key with whitespace should be detected: {:?}",
1849            result.details
1850        );
1851    }
1852
1853    #[test]
1854    fn test_check_no_secrets_flags_forbidden_config_key_inside_array() {
1855        let temp = TempDir::new().unwrap();
1856        let site_dir = temp.path().join("site");
1857        fs::create_dir_all(&site_dir).unwrap();
1858        fs::write(
1859            site_dir.join("config.json"),
1860            r#"{
1861                "encrypted": false,
1862                "version": "1.0",
1863                "payload": { "path": "payload/data.sqlite", "format": "sqlite" },
1864                "metadata": [{ "private_key" : "leaked" }]
1865            }"#,
1866        )
1867        .unwrap();
1868
1869        let result = check_no_secrets(&site_dir);
1870        assert!(!result.passed);
1871        assert!(
1872            result
1873                .details
1874                .as_ref()
1875                .map(|details| {
1876                    details.contains(
1877                        "config.json contains forbidden field: private_key field at metadata[0].private_key",
1878                    )
1879                })
1880                .unwrap_or(false),
1881            "forbidden key inside arrays should be detected: {:?}",
1882            result.details
1883        );
1884    }
1885
1886    #[test]
1887    #[cfg(unix)]
1888    fn test_check_no_secrets_does_not_follow_symlinked_directories() {
1889        use std::os::unix::fs::symlink;
1890
1891        let temp = TempDir::new().unwrap();
1892        let site_dir = temp.path().join("site");
1893        let outside_dir = temp.path().join("outside");
1894        fs::create_dir_all(&site_dir).unwrap();
1895        fs::create_dir_all(outside_dir.join("private")).unwrap();
1896        fs::write(outside_dir.join("private/recovery-secret.txt"), "secret").unwrap();
1897        symlink(&outside_dir, site_dir.join("linked-assets")).unwrap();
1898
1899        let result = check_no_secrets(&site_dir);
1900        assert!(
1901            result.passed,
1902            "symlink targets outside site/ should not be scanned as in-tree secrets: {:?}",
1903            result.details
1904        );
1905    }
1906
1907    #[test]
1908    #[cfg(unix)]
1909    fn test_check_no_secrets_flags_secret_named_symlink_without_recursing() {
1910        use std::os::unix::fs::symlink;
1911
1912        let temp = TempDir::new().unwrap();
1913        let site_dir = temp.path().join("site");
1914        let benign_dir = temp.path().join("benign");
1915        fs::create_dir_all(site_dir.join("nested")).unwrap();
1916        fs::create_dir_all(&benign_dir).unwrap();
1917        symlink(&benign_dir, site_dir.join("nested/private")).unwrap();
1918
1919        let result = check_no_secrets(&site_dir);
1920        assert!(!result.passed);
1921        assert!(
1922            result
1923                .details
1924                .as_ref()
1925                .map(|details| {
1926                    details.contains("Secret directory found in site subdirectory: nested/private/")
1927                })
1928                .unwrap_or(false),
1929            "secret-named symlink should still be reported: {:?}",
1930            result.details
1931        );
1932    }
1933
1934    #[test]
1935    #[cfg(unix)]
1936    fn test_check_no_secrets_flags_top_level_secret_file_broken_symlink() {
1937        use std::os::unix::fs::symlink;
1938
1939        let temp = TempDir::new().unwrap();
1940        let site_dir = temp.path().join("site");
1941        fs::create_dir_all(&site_dir).unwrap();
1942        symlink(
1943            temp.path().join("missing-recovery-secret"),
1944            site_dir.join("recovery-secret.txt"),
1945        )
1946        .unwrap();
1947
1948        let result = check_no_secrets(&site_dir);
1949        assert!(!result.passed);
1950        assert!(
1951            result
1952                .details
1953                .as_ref()
1954                .map(|details| details.contains("Secret file found in site/: recovery-secret.txt"))
1955                .unwrap_or(false),
1956            "top-level dangling secret symlink should still be reported: {:?}",
1957            result.details
1958        );
1959    }
1960
1961    #[test]
1962    #[cfg(unix)]
1963    fn test_check_no_secrets_flags_top_level_secret_dir_broken_symlink() {
1964        use std::os::unix::fs::symlink;
1965
1966        let temp = TempDir::new().unwrap();
1967        let site_dir = temp.path().join("site");
1968        fs::create_dir_all(&site_dir).unwrap();
1969        symlink(
1970            temp.path().join("missing-private"),
1971            site_dir.join("private"),
1972        )
1973        .unwrap();
1974
1975        let result = check_no_secrets(&site_dir);
1976        assert!(!result.passed);
1977        assert!(
1978            result
1979                .details
1980                .as_ref()
1981                .map(|details| details.contains("Secret directory found in site/: private/"))
1982                .unwrap_or(false),
1983            "top-level dangling private symlink should still be reported: {:?}",
1984            result.details
1985        );
1986    }
1987
1988    #[test]
1989    fn test_verify_with_integrity() {
1990        let temp = TempDir::new().unwrap();
1991        let site_dir = temp.path().join("site");
1992
1993        // Copy valid fixture
1994        copy_fixture("valid", &site_dir).unwrap();
1995
1996        // Create integrity.json
1997        let mut files = BTreeMap::new();
1998        for file in REQUIRED_FILES {
1999            let hash = compute_file_hash(&site_dir.join(file)).unwrap();
2000            let size = fs::metadata(site_dir.join(file)).unwrap().len();
2001            files.insert(file.to_string(), IntegrityEntry { sha256: hash, size });
2002        }
2003        // Add payload chunk
2004        let chunk_hash = compute_file_hash(&site_dir.join("payload/chunk-00000.bin")).unwrap();
2005        let chunk_size = fs::metadata(site_dir.join("payload/chunk-00000.bin"))
2006            .unwrap()
2007            .len();
2008        files.insert(
2009            "payload/chunk-00000.bin".to_string(),
2010            IntegrityEntry {
2011                sha256: chunk_hash,
2012                size: chunk_size,
2013            },
2014        );
2015
2016        let manifest = IntegrityManifest {
2017            version: 1,
2018            generated_at: "2024-01-01T00:00:00Z".to_string(),
2019            files,
2020        };
2021        fs::write(
2022            site_dir.join("integrity.json"),
2023            serde_json::to_string_pretty(&manifest).unwrap(),
2024        )
2025        .unwrap();
2026
2027        let result = verify_bundle(&site_dir, false).unwrap();
2028        assert!(result.checks.integrity.passed);
2029    }
2030
2031    #[test]
2032    fn test_verify_integrity_mismatch() {
2033        let temp = TempDir::new().unwrap();
2034        let site_dir = temp.path().join("site");
2035
2036        // Copy valid fixture
2037        copy_fixture("valid", &site_dir).unwrap();
2038
2039        // Create integrity.json with wrong hash
2040        let mut files = BTreeMap::new();
2041        files.insert(
2042            "index.html".to_string(),
2043            IntegrityEntry {
2044                sha256: "0000000000000000000000000000000000000000000000000000000000000000"
2045                    .to_string(),
2046                size: 10,
2047            },
2048        );
2049
2050        let manifest = IntegrityManifest {
2051            version: 1,
2052            generated_at: "2024-01-01T00:00:00Z".to_string(),
2053            files,
2054        };
2055        fs::write(
2056            site_dir.join("integrity.json"),
2057            serde_json::to_string_pretty(&manifest).unwrap(),
2058        )
2059        .unwrap();
2060
2061        let result = verify_bundle(&site_dir, false).unwrap();
2062        assert!(!result.checks.integrity.passed);
2063        let details = result.checks.integrity.details.as_ref().unwrap();
2064        assert!(
2065            details.contains("Size mismatch") || details.contains("Hash mismatch"),
2066            "expected size or hash mismatch, got: {details}"
2067        );
2068    }
2069
2070    #[test]
2071    fn test_resolve_site_dir() {
2072        let temp = TempDir::new().unwrap();
2073
2074        // Test with site/ subdirectory
2075        let site_dir = temp.path().join("site");
2076        fs::create_dir_all(&site_dir).unwrap();
2077
2078        let resolved = crate::pages::resolve_site_dir(temp.path()).unwrap();
2079        assert!(resolved.ends_with("site"));
2080
2081        // Test with direct path
2082        let resolved_direct = crate::pages::resolve_site_dir(&site_dir).unwrap();
2083        assert_eq!(resolved_direct, site_dir);
2084    }
2085
2086    #[test]
2087    #[cfg(unix)]
2088    fn test_resolve_site_dir_rejects_symlinked_site_directory() {
2089        use std::os::unix::fs::symlink;
2090
2091        let bundle_root = TempDir::new().unwrap();
2092        let outside = TempDir::new().unwrap();
2093        let outside_site = outside.path().join("site");
2094        fs::create_dir_all(&outside_site).unwrap();
2095        fs::write(outside_site.join("index.html"), "<html></html>").unwrap();
2096        symlink(&outside_site, bundle_root.path().join("site")).unwrap();
2097
2098        let err = crate::pages::resolve_site_dir(bundle_root.path())
2099            .unwrap_err()
2100            .to_string();
2101        assert!(err.contains("must not be a symlink"));
2102
2103        let direct_err = crate::pages::resolve_site_dir(&bundle_root.path().join("site"))
2104            .unwrap_err()
2105            .to_string();
2106        assert!(direct_err.contains("must not be a symlink"));
2107    }
2108
2109    #[test]
2110    fn test_chunk_size_limit() {
2111        let temp = TempDir::new().unwrap();
2112        let site_dir = temp.path();
2113        let payload_dir = site_dir.join("payload");
2114        fs::create_dir_all(&payload_dir).unwrap();
2115
2116        // Create config.json for encrypted archive (required by check_size_limits)
2117        let config = r#"{
2118          "version": 2,
2119          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2120          "base_nonce": "AAAAAAAAAAAAAAAA",
2121          "compression": "deflate",
2122          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2123          "payload": {
2124            "chunk_size": 1024,
2125            "chunk_count": 1,
2126            "total_compressed_size": 14,
2127            "total_plaintext_size": 100,
2128            "files": ["payload/chunk-00000.bin"]
2129          },
2130          "key_slots": [{
2131            "id": 0,
2132            "slot_type": "password",
2133            "kdf": "argon2id",
2134            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2135            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2136            "nonce": "AAAAAAAAAAAAAAAA",
2137            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2138          }]
2139        }"#;
2140        fs::write(site_dir.join("config.json"), config).unwrap();
2141
2142        // Create a small file (should pass)
2143        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2144
2145        let result = check_size_limits(site_dir);
2146        assert!(result.passed);
2147    }
2148
2149    #[test]
2150    fn test_payload_manifest_rejects_unexpected_high_chunk_index() {
2151        let temp = TempDir::new().unwrap();
2152        let site_dir = temp.path();
2153        let payload_dir = site_dir.join("payload");
2154        fs::create_dir_all(&payload_dir).unwrap();
2155
2156        let config = r#"{
2157          "version": 2,
2158          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2159          "base_nonce": "AAAAAAAAAAAAAAAA",
2160          "compression": "deflate",
2161          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2162          "payload": {
2163            "chunk_size": 1024,
2164            "chunk_count": 1,
2165            "total_compressed_size": 14,
2166            "total_plaintext_size": 100,
2167            "files": ["payload/chunk-00000.bin"]
2168          },
2169          "key_slots": [{
2170            "id": 0,
2171            "slot_type": "password",
2172            "kdf": "argon2id",
2173            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2174            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2175            "nonce": "AAAAAAAAAAAAAAAA",
2176            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2177          }]
2178        }"#;
2179        fs::write(site_dir.join("config.json"), config).unwrap();
2180
2181        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2182        fs::write(payload_dir.join("chunk-99999.bin"), "unexpected").unwrap();
2183
2184        let result = check_payload_manifest(site_dir);
2185        assert!(!result.passed);
2186        let details = result.details.unwrap_or_default();
2187        assert!(details.contains("Unexpected chunk file index: chunk-99999.bin"));
2188    }
2189
2190    #[test]
2191    fn test_payload_manifest_rejects_non_file_chunk_entry() {
2192        let temp = TempDir::new().unwrap();
2193        let site_dir = temp.path();
2194        let payload_dir = site_dir.join("payload");
2195        fs::create_dir_all(&payload_dir).unwrap();
2196
2197        let config = r#"{
2198          "version": 2,
2199          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2200          "base_nonce": "AAAAAAAAAAAAAAAA",
2201          "compression": "deflate",
2202          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2203          "payload": {
2204            "chunk_size": 1024,
2205            "chunk_count": 1,
2206            "total_compressed_size": 14,
2207            "total_plaintext_size": 100,
2208            "files": ["payload/chunk-00000.bin"]
2209          },
2210          "key_slots": [{
2211            "id": 0,
2212            "slot_type": "password",
2213            "kdf": "argon2id",
2214            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2215            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2216            "nonce": "AAAAAAAAAAAAAAAA",
2217            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2218          }]
2219        }"#;
2220        fs::write(site_dir.join("config.json"), config).unwrap();
2221        fs::create_dir_all(payload_dir.join("chunk-00000.bin")).unwrap();
2222
2223        let result = check_payload_manifest(site_dir);
2224        assert!(!result.passed);
2225        assert!(
2226            result
2227                .details
2228                .as_ref()
2229                .map(|d| d.contains("payload/chunk-00000.bin must be a regular file"))
2230                .unwrap_or(false)
2231        );
2232    }
2233
2234    #[test]
2235    fn test_payload_manifest_rejects_malformed_chunk_filename() {
2236        let temp = TempDir::new().unwrap();
2237        let site_dir = temp.path();
2238        let payload_dir = site_dir.join("payload");
2239        fs::create_dir_all(&payload_dir).unwrap();
2240
2241        let config = r#"{
2242          "version": 2,
2243          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2244          "base_nonce": "AAAAAAAAAAAAAAAA",
2245          "compression": "deflate",
2246          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2247          "payload": {
2248            "chunk_size": 1024,
2249            "chunk_count": 1,
2250            "total_compressed_size": 14,
2251            "total_plaintext_size": 100,
2252            "files": ["payload/chunk-00000.bin"]
2253          },
2254          "key_slots": [{
2255            "id": 0,
2256            "slot_type": "password",
2257            "kdf": "argon2id",
2258            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2259            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2260            "nonce": "AAAAAAAAAAAAAAAA",
2261            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2262          }]
2263        }"#;
2264        fs::write(site_dir.join("config.json"), config).unwrap();
2265        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2266        fs::write(payload_dir.join("chunk-1.bin"), "malformed").unwrap();
2267
2268        let result = check_payload_manifest(site_dir);
2269        assert!(!result.passed);
2270        assert!(
2271            result
2272                .details
2273                .as_ref()
2274                .map(|d| d.contains("Malformed chunk filename: chunk-1.bin"))
2275                .unwrap_or(false)
2276        );
2277    }
2278
2279    #[test]
2280    fn test_payload_manifest_treats_six_digit_chunk_name_as_unexpected_not_malformed() {
2281        let temp = TempDir::new().unwrap();
2282        let site_dir = temp.path();
2283        let payload_dir = site_dir.join("payload");
2284        fs::create_dir_all(&payload_dir).unwrap();
2285
2286        let config = r#"{
2287          "version": 2,
2288          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2289          "base_nonce": "AAAAAAAAAAAAAAAA",
2290          "compression": "deflate",
2291          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2292          "payload": {
2293            "chunk_size": 1024,
2294            "chunk_count": 1,
2295            "total_compressed_size": 14,
2296            "total_plaintext_size": 100,
2297            "files": ["payload/chunk-00000.bin"]
2298          },
2299          "key_slots": [{
2300            "id": 0,
2301            "slot_type": "password",
2302            "kdf": "argon2id",
2303            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2304            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2305            "nonce": "AAAAAAAAAAAAAAAA",
2306            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2307          }]
2308        }"#;
2309        fs::write(site_dir.join("config.json"), config).unwrap();
2310        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2311        fs::write(payload_dir.join("chunk-100000.bin"), "unexpected").unwrap();
2312
2313        let result = check_payload_manifest(site_dir);
2314        assert!(!result.passed);
2315        let details = result.details.unwrap_or_default();
2316        assert!(details.contains("Unexpected chunk file index: chunk-100000.bin"));
2317        assert!(!details.contains("Malformed chunk filename: chunk-100000.bin"));
2318    }
2319
2320    #[test]
2321    fn test_unencrypted_payload_must_be_regular_file() {
2322        let temp = TempDir::new().unwrap();
2323        let site_dir = temp.path();
2324        let payload_dir = site_dir.join("payload");
2325        fs::create_dir_all(&payload_dir).unwrap();
2326        fs::create_dir_all(payload_dir.join("data.sqlite")).unwrap();
2327
2328        let config = r#"{
2329          "encrypted": false,
2330          "version": "1.0",
2331          "payload": {
2332            "path": "payload/data.sqlite",
2333            "format": "sqlite"
2334          }
2335        }"#;
2336        fs::write(site_dir.join("config.json"), config).unwrap();
2337
2338        let manifest_result = check_payload_manifest(site_dir);
2339        assert!(!manifest_result.passed);
2340        assert!(
2341            manifest_result
2342                .details
2343                .as_ref()
2344                .map(|d| d.contains("payload/data.sqlite must be a regular file"))
2345                .unwrap_or(false)
2346        );
2347
2348        let size_result = check_size_limits(site_dir);
2349        assert!(!size_result.passed);
2350        assert!(
2351            size_result
2352                .details
2353                .as_ref()
2354                .map(|d| d.contains("payload/data.sqlite must be a regular file"))
2355                .unwrap_or(false)
2356        );
2357    }
2358
2359    #[test]
2360    fn test_size_limits_rejects_non_file_chunk_entry() {
2361        let temp = TempDir::new().unwrap();
2362        let site_dir = temp.path();
2363        let payload_dir = site_dir.join("payload");
2364        fs::create_dir_all(&payload_dir).unwrap();
2365
2366        let config = r#"{
2367          "version": 2,
2368          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2369          "base_nonce": "AAAAAAAAAAAAAAAA",
2370          "compression": "deflate",
2371          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2372          "payload": {
2373            "chunk_size": 1024,
2374            "chunk_count": 1,
2375            "total_compressed_size": 14,
2376            "total_plaintext_size": 100,
2377            "files": ["payload/chunk-00000.bin"]
2378          },
2379          "key_slots": [{
2380            "id": 0,
2381            "slot_type": "password",
2382            "kdf": "argon2id",
2383            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2384            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2385            "nonce": "AAAAAAAAAAAAAAAA",
2386            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2387          }]
2388        }"#;
2389        fs::write(site_dir.join("config.json"), config).unwrap();
2390        fs::create_dir_all(payload_dir.join("chunk-00000.bin")).unwrap();
2391
2392        let result = check_size_limits(site_dir);
2393        assert!(!result.passed);
2394        assert!(
2395            result
2396                .details
2397                .as_ref()
2398                .map(|d| d.contains("chunk-00000.bin must be a regular file"))
2399                .unwrap_or(false)
2400        );
2401    }
2402
2403    #[test]
2404    #[cfg(unix)]
2405    fn test_size_limits_rejects_symlinked_chunk() {
2406        use std::os::unix::fs::symlink;
2407
2408        let temp = TempDir::new().unwrap();
2409        let outside = TempDir::new().unwrap();
2410        let site_dir = temp.path();
2411        let payload_dir = site_dir.join("payload");
2412        fs::create_dir_all(&payload_dir).unwrap();
2413
2414        let config = r#"{
2415          "version": 2,
2416          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2417          "base_nonce": "AAAAAAAAAAAAAAAA",
2418          "compression": "deflate",
2419          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2420          "payload": {
2421            "chunk_size": 1024,
2422            "chunk_count": 1,
2423            "total_compressed_size": 14,
2424            "total_plaintext_size": 100,
2425            "files": ["payload/chunk-00000.bin"]
2426          },
2427          "key_slots": [{
2428            "id": 0,
2429            "slot_type": "password",
2430            "kdf": "argon2id",
2431            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2432            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2433            "nonce": "AAAAAAAAAAAAAAAA",
2434            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2435          }]
2436        }"#;
2437        fs::write(site_dir.join("config.json"), config).unwrap();
2438
2439        fs::write(outside.path().join("chunk-00000.bin"), "external").unwrap();
2440        symlink(
2441            outside.path().join("chunk-00000.bin"),
2442            payload_dir.join("chunk-00000.bin"),
2443        )
2444        .unwrap();
2445
2446        let result = check_size_limits(site_dir);
2447        assert!(!result.passed);
2448        assert!(
2449            result
2450                .details
2451                .as_ref()
2452                .map(|d| d.contains("must not be a symlink"))
2453                .unwrap_or(false)
2454        );
2455    }
2456
2457    #[test]
2458    fn test_integrity_path_traversal_blocked() {
2459        use std::collections::BTreeMap;
2460
2461        let temp = TempDir::new().unwrap();
2462        let site_dir = temp.path();
2463
2464        // Create integrity.json with path traversal attempt
2465        let mut files = BTreeMap::new();
2466        files.insert(
2467            "../../../etc/passwd".to_string(),
2468            crate::pages::bundle::IntegrityEntry {
2469                sha256: "deadbeef".repeat(8),
2470                size: 100,
2471            },
2472        );
2473        let manifest = IntegrityManifest {
2474            version: 1,
2475            generated_at: "2025-01-01T00:00:00Z".to_string(),
2476            files,
2477        };
2478        let manifest_json = serde_json::to_string(&manifest).unwrap();
2479        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
2480
2481        // Verify the check catches the path traversal
2482        let result = check_integrity(site_dir, false);
2483        assert!(!result.passed, "Path traversal should be blocked");
2484        assert!(
2485            result
2486                .details
2487                .as_ref()
2488                .map(|d| d.contains("security violation"))
2489                .unwrap_or(false),
2490            "Should mention security violation"
2491        );
2492    }
2493
2494    #[test]
2495    fn test_integrity_absolute_path_blocked() {
2496        use std::collections::BTreeMap;
2497
2498        let temp = TempDir::new().unwrap();
2499        let site_dir = temp.path();
2500
2501        // Create integrity.json with absolute path
2502        let mut files = BTreeMap::new();
2503        files.insert(
2504            "/etc/passwd".to_string(),
2505            crate::pages::bundle::IntegrityEntry {
2506                sha256: "deadbeef".repeat(8),
2507                size: 100,
2508            },
2509        );
2510        let manifest = IntegrityManifest {
2511            version: 1,
2512            generated_at: "2025-01-01T00:00:00Z".to_string(),
2513            files,
2514        };
2515        let manifest_json = serde_json::to_string(&manifest).unwrap();
2516        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
2517
2518        // Verify the check catches the absolute path
2519        let result = check_integrity(site_dir, false);
2520        assert!(!result.passed, "Absolute path should be blocked");
2521        assert!(
2522            result
2523                .details
2524                .as_ref()
2525                .map(|d| d.contains("security violation"))
2526                .unwrap_or(false),
2527            "Should mention security violation"
2528        );
2529    }
2530
2531    #[test]
2532    fn test_integrity_url_encoded_traversal_blocked_single() {
2533        assert_integrity_path_blocked("%2e%2e/%2e%2e/etc/passwd");
2534    }
2535
2536    #[test]
2537    fn test_integrity_url_encoded_traversal_blocked_double() {
2538        assert_integrity_path_blocked("%252e%252e/%252e%252e/etc/passwd");
2539    }
2540
2541    #[test]
2542    fn test_integrity_url_encoded_traversal_blocked_mixed() {
2543        assert_integrity_path_blocked("%2e./etc/passwd");
2544        assert_integrity_path_blocked(".%2e/etc/passwd");
2545        assert_integrity_path_blocked("..%2fetc/passwd");
2546    }
2547
2548    #[test]
2549    fn test_integrity_url_encoded_traversal_blocked_uppercase() {
2550        assert_integrity_path_blocked("%2E%2E/%2Fetc/passwd");
2551    }
2552
2553    #[test]
2554    fn test_integrity_url_encoded_traversal_blocked_overlong_utf8() {
2555        assert_integrity_path_blocked("%c0%ae%c0%ae/%c0%ae%c0%ae/etc/passwd");
2556    }
2557
2558    #[test]
2559    fn test_integrity_url_encoded_traversal_blocked_null_byte() {
2560        assert_integrity_path_blocked("valid%00/../etc/passwd");
2561    }
2562
2563    #[test]
2564    fn test_integrity_url_encoded_traversal_blocked_backslash() {
2565        assert_integrity_path_blocked("..\\..\\etc\\passwd");
2566        assert_integrity_path_blocked("..%5c..%5cetc%5cpasswd");
2567    }
2568
2569    #[test]
2570    fn test_integrity_url_encoded_traversal_blocked_separator_confusion() {
2571        assert_integrity_path_blocked(r"..\/..\/etc\/passwd");
2572    }
2573
2574    // --- Unicode normalization attack tests ---
2575
2576    #[test]
2577    fn test_integrity_unicode_fullwidth_dots_blocked() {
2578        // U+FF0E FULLWIDTH FULL STOP looks like '.' but is a different codepoint.
2579        // Two fullwidth dots form a visual ".." that bypasses naive ASCII checks.
2580        assert_integrity_path_blocked("\u{FF0E}\u{FF0E}/etc/passwd");
2581    }
2582
2583    #[test]
2584    fn test_integrity_unicode_fullwidth_slash_blocked() {
2585        // U+FF0F FULLWIDTH SOLIDUS looks like '/' but is a different codepoint.
2586        assert_integrity_path_blocked("payload\u{FF0F}..\\..\\etc\\passwd");
2587    }
2588
2589    #[test]
2590    fn test_integrity_unicode_fullwidth_backslash_blocked() {
2591        // U+FF3C FULLWIDTH REVERSE SOLIDUS looks like '\' but is a different codepoint.
2592        assert_integrity_path_blocked("payload\u{FF3C}..\\..\\etc\\passwd");
2593    }
2594
2595    #[test]
2596    fn test_integrity_unicode_small_full_stop_blocked() {
2597        // U+FE52 SMALL FULL STOP - a compatibility variant of '.'
2598        assert_integrity_path_blocked("\u{FE52}\u{FE52}/etc/passwd");
2599    }
2600
2601    #[test]
2602    fn test_integrity_unicode_one_dot_leader_blocked() {
2603        // U+2024 ONE DOT LEADER - looks nearly identical to '.'
2604        assert_integrity_path_blocked("\u{2024}\u{2024}/etc/passwd");
2605    }
2606
2607    #[test]
2608    fn test_integrity_unicode_halfwidth_ideographic_full_stop_blocked() {
2609        // U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP
2610        assert_integrity_path_blocked("\u{FF61}\u{FF61}/etc/passwd");
2611    }
2612
2613    #[test]
2614    fn test_integrity_unicode_mixed_fullwidth_and_ascii_blocked() {
2615        // Mix fullwidth and ASCII dots — the fullwidth char alone should trigger
2616        assert_integrity_path_blocked(".\u{FF0E}/etc/passwd");
2617        assert_integrity_path_blocked("\u{FF0E}./etc/passwd");
2618    }
2619
2620    #[test]
2621    fn test_integrity_percent_encoded_unicode_fullwidth_dot_blocked() {
2622        // Percent-encoded UTF-8 for U+FF0E (FULLWIDTH FULL STOP): 0xEF 0xBC 0x8E
2623        assert_integrity_path_blocked("%ef%bc%8e%ef%bc%8e/etc/passwd");
2624    }
2625
2626    // --- Case sensitivity / Windows path tests ---
2627
2628    #[test]
2629    fn test_integrity_windows_drive_letter_blocked() {
2630        assert_integrity_path_blocked("C:\\Windows\\System32\\config\\SAM");
2631    }
2632
2633    #[test]
2634    fn test_integrity_windows_drive_letter_lowercase_blocked() {
2635        assert_integrity_path_blocked("c:\\windows\\system32");
2636    }
2637
2638    #[test]
2639    fn test_integrity_windows_drive_letter_forward_slash_blocked() {
2640        assert_integrity_path_blocked("C:/Windows/System32");
2641    }
2642
2643    #[test]
2644    fn test_integrity_windows_unc_path_blocked() {
2645        // UNC paths start with \\ — should be caught as absolute
2646        assert_integrity_path_blocked("\\\\server\\share\\file.txt");
2647    }
2648
2649    // --- Symlink traversal tests ---
2650
2651    #[test]
2652    #[cfg(unix)]
2653    fn test_integrity_symlink_traversal_blocked() {
2654        use std::os::unix::fs::symlink;
2655
2656        let temp = TempDir::new().unwrap();
2657        let site_dir = temp.path();
2658
2659        // Create a target file outside the site directory
2660        let outside_dir = TempDir::new().unwrap();
2661        let secret_file = outside_dir.path().join("secret.txt");
2662        fs::write(&secret_file, "sensitive data").unwrap();
2663
2664        // Create a symlink inside the site directory that points outside
2665        let link_path = site_dir.join("evil_link.txt");
2666        symlink(&secret_file, &link_path).unwrap();
2667
2668        // Compute hash of the file the symlink points to
2669        let hash = compute_file_hash(&link_path).unwrap();
2670        let size = fs::metadata(&link_path).unwrap().len();
2671
2672        let mut files = BTreeMap::new();
2673        files.insert(
2674            "evil_link.txt".to_string(),
2675            IntegrityEntry { sha256: hash, size },
2676        );
2677        let manifest = IntegrityManifest {
2678            version: 1,
2679            generated_at: "2025-01-01T00:00:00Z".to_string(),
2680            files,
2681        };
2682        fs::write(
2683            site_dir.join("integrity.json"),
2684            serde_json::to_string(&manifest).unwrap(),
2685        )
2686        .unwrap();
2687
2688        // The canonicalize check should detect the symlink escapes site_dir
2689        let result = check_integrity(site_dir, false);
2690        assert!(
2691            !result.passed,
2692            "Symlink traversal outside site_dir should be blocked"
2693        );
2694        assert!(
2695            result
2696                .details
2697                .as_ref()
2698                .map(|d| d.contains("security violation"))
2699                .unwrap_or(false),
2700            "Should mention security violation for symlink escape"
2701        );
2702    }
2703
2704    #[test]
2705    #[cfg(unix)]
2706    fn test_integrity_symlink_within_site_dir_allowed() {
2707        use std::os::unix::fs::symlink;
2708
2709        let temp = TempDir::new().unwrap();
2710        let site_dir = temp.path();
2711
2712        // Create a real file inside site_dir
2713        let real_file = site_dir.join("real.txt");
2714        fs::write(&real_file, "legitimate data").unwrap();
2715
2716        // Create a symlink that points to a file inside site_dir
2717        let link_path = site_dir.join("link_to_real.txt");
2718        symlink(&real_file, &link_path).unwrap();
2719
2720        let hash = compute_file_hash(&link_path).unwrap();
2721        let size = fs::metadata(&link_path).unwrap().len();
2722
2723        let mut files = BTreeMap::new();
2724        files.insert(
2725            "link_to_real.txt".to_string(),
2726            IntegrityEntry { sha256: hash, size },
2727        );
2728        // Also include the real file and integrity.json in manifest
2729        let real_hash = compute_file_hash(&real_file).unwrap();
2730        let real_size = fs::metadata(&real_file).unwrap().len();
2731        files.insert(
2732            "real.txt".to_string(),
2733            IntegrityEntry {
2734                sha256: real_hash,
2735                size: real_size,
2736            },
2737        );
2738
2739        let manifest = IntegrityManifest {
2740            version: 1,
2741            generated_at: "2025-01-01T00:00:00Z".to_string(),
2742            files,
2743        };
2744        fs::write(
2745            site_dir.join("integrity.json"),
2746            serde_json::to_string(&manifest).unwrap(),
2747        )
2748        .unwrap();
2749
2750        // Symlink within site_dir should be OK
2751        let result = check_integrity(site_dir, false);
2752        assert!(
2753            result.passed,
2754            "Symlink within site_dir should be allowed: {:?}",
2755            result.details
2756        );
2757    }
2758
2759    // --- False positive tests: legitimate paths should NOT be blocked ---
2760
2761    #[test]
2762    fn test_integrity_legitimate_dotted_version_not_blocked() {
2763        // "v2.1.0" contains dots but they're version numbers, not traversal
2764        let temp = TempDir::new().unwrap();
2765        let site_dir = temp.path();
2766        let target = site_dir.join("assets/v2.1.0/bundle.js");
2767        fs::create_dir_all(target.parent().unwrap()).unwrap();
2768        fs::write(&target, "// bundle").unwrap();
2769
2770        let hash = compute_file_hash(&target).unwrap();
2771        let size = fs::metadata(&target).unwrap().len();
2772        let mut files = BTreeMap::new();
2773        files.insert(
2774            "assets/v2.1.0/bundle.js".to_string(),
2775            IntegrityEntry { sha256: hash, size },
2776        );
2777
2778        let manifest = IntegrityManifest {
2779            version: 1,
2780            generated_at: "2025-01-01T00:00:00Z".to_string(),
2781            files,
2782        };
2783        fs::write(
2784            site_dir.join("integrity.json"),
2785            serde_json::to_string(&manifest).unwrap(),
2786        )
2787        .unwrap();
2788
2789        let result = check_integrity(site_dir, false);
2790        assert!(
2791            result.passed,
2792            "Dotted version path should not be blocked: {:?}",
2793            result.details
2794        );
2795    }
2796
2797    #[test]
2798    fn test_integrity_legitimate_hidden_file_not_blocked() {
2799        // ".nojekyll" starts with a dot — should not be confused with traversal
2800        let temp = TempDir::new().unwrap();
2801        let site_dir = temp.path();
2802        let target = site_dir.join(".nojekyll");
2803        fs::write(&target, "").unwrap();
2804
2805        let hash = compute_file_hash(&target).unwrap();
2806        let size = fs::metadata(&target).unwrap().len();
2807        let mut files = BTreeMap::new();
2808        files.insert(
2809            ".nojekyll".to_string(),
2810            IntegrityEntry { sha256: hash, size },
2811        );
2812
2813        let manifest = IntegrityManifest {
2814            version: 1,
2815            generated_at: "2025-01-01T00:00:00Z".to_string(),
2816            files,
2817        };
2818        fs::write(
2819            site_dir.join("integrity.json"),
2820            serde_json::to_string(&manifest).unwrap(),
2821        )
2822        .unwrap();
2823
2824        let result = check_integrity(site_dir, false);
2825        assert!(
2826            result.passed,
2827            "Hidden file (.nojekyll) should not be blocked: {:?}",
2828            result.details
2829        );
2830    }
2831
2832    #[test]
2833    fn test_integrity_legitimate_payload_subdir_not_blocked() {
2834        let temp = TempDir::new().unwrap();
2835        let site_dir = temp.path();
2836        let target = site_dir.join("payload/data/sessions.db");
2837        fs::create_dir_all(target.parent().unwrap()).unwrap();
2838        fs::write(&target, "sqlite").unwrap();
2839
2840        let hash = compute_file_hash(&target).unwrap();
2841        let size = fs::metadata(&target).unwrap().len();
2842        let mut files = BTreeMap::new();
2843        files.insert(
2844            "payload/data/sessions.db".to_string(),
2845            IntegrityEntry { sha256: hash, size },
2846        );
2847
2848        let manifest = IntegrityManifest {
2849            version: 1,
2850            generated_at: "2025-01-01T00:00:00Z".to_string(),
2851            files,
2852        };
2853        fs::write(
2854            site_dir.join("integrity.json"),
2855            serde_json::to_string(&manifest).unwrap(),
2856        )
2857        .unwrap();
2858
2859        let result = check_integrity(site_dir, false);
2860        assert!(
2861            result.passed,
2862            "Legitimate payload subdirectory should not be blocked: {:?}",
2863            result.details
2864        );
2865    }
2866
2867    #[test]
2868    fn test_integrity_legitimate_hyphens_underscores_not_blocked() {
2869        let temp = TempDir::new().unwrap();
2870        let site_dir = temp.path();
2871        let target = site_dir.join("css/main-v2_final.css");
2872        fs::create_dir_all(target.parent().unwrap()).unwrap();
2873        fs::write(&target, "body{}").unwrap();
2874
2875        let hash = compute_file_hash(&target).unwrap();
2876        let size = fs::metadata(&target).unwrap().len();
2877        let mut files = BTreeMap::new();
2878        files.insert(
2879            "css/main-v2_final.css".to_string(),
2880            IntegrityEntry { sha256: hash, size },
2881        );
2882
2883        let manifest = IntegrityManifest {
2884            version: 1,
2885            generated_at: "2025-01-01T00:00:00Z".to_string(),
2886            files,
2887        };
2888        fs::write(
2889            site_dir.join("integrity.json"),
2890            serde_json::to_string(&manifest).unwrap(),
2891        )
2892        .unwrap();
2893
2894        let result = check_integrity(site_dir, false);
2895        assert!(
2896            result.passed,
2897            "Path with hyphens/underscores should not be blocked: {:?}",
2898            result.details
2899        );
2900    }
2901
2902    // --- Unit tests for helper functions ---
2903
2904    #[test]
2905    fn test_contains_unicode_path_attack_detects_fullwidth_period() {
2906        assert!(contains_unicode_path_attack("\u{FF0E}"));
2907        assert!(contains_unicode_path_attack("foo\u{FF0E}bar"));
2908    }
2909
2910    #[test]
2911    fn test_contains_unicode_path_attack_detects_fullwidth_solidus() {
2912        assert!(contains_unicode_path_attack("\u{FF0F}"));
2913    }
2914
2915    #[test]
2916    fn test_contains_unicode_path_attack_detects_fullwidth_reverse_solidus() {
2917        assert!(contains_unicode_path_attack("\u{FF3C}"));
2918    }
2919
2920    #[test]
2921    fn test_contains_unicode_path_attack_detects_small_full_stop() {
2922        assert!(contains_unicode_path_attack("\u{FE52}"));
2923    }
2924
2925    #[test]
2926    fn test_contains_unicode_path_attack_detects_one_dot_leader() {
2927        assert!(contains_unicode_path_attack("\u{2024}"));
2928    }
2929
2930    #[test]
2931    fn test_contains_unicode_path_attack_allows_ascii() {
2932        assert!(!contains_unicode_path_attack("payload/chunk-00000.bin"));
2933        assert!(!contains_unicode_path_attack("../etc/passwd")); // traversal, but ASCII
2934        assert!(!contains_unicode_path_attack(".nojekyll"));
2935    }
2936
2937    #[test]
2938    fn test_detect_encoded_path_violation_unicode_attack() {
2939        let result = detect_encoded_path_violation("\u{FF0E}\u{FF0E}/etc/passwd");
2940        assert_eq!(result, Some("unicode normalization attack".to_string()));
2941    }
2942
2943    #[test]
2944    fn test_detect_encoded_path_violation_percent_encoded_unicode() {
2945        // %EF%BC%8E = UTF-8 encoding of U+FF0E (FULLWIDTH FULL STOP)
2946        let result = detect_encoded_path_violation("%ef%bc%8e%ef%bc%8e/etc/passwd");
2947        assert_eq!(
2948            result,
2949            Some("url-encoded unicode normalization attack".to_string())
2950        );
2951    }
2952
2953    // --- Additional Unicode normalization attack tests (coding_agent_session_search-13za) ---
2954
2955    #[test]
2956    fn test_integrity_unicode_combining_long_solidus_overlay_blocked() {
2957        // U+0338 COMBINING LONG SOLIDUS OVERLAY - could visually disguise characters
2958        assert_integrity_path_blocked(".\u{0338}./etc/passwd");
2959    }
2960
2961    #[test]
2962    fn test_integrity_unicode_combining_short_solidus_overlay_blocked() {
2963        // U+0337 COMBINING SHORT SOLIDUS OVERLAY
2964        assert_integrity_path_blocked(".\u{0337}./etc/passwd");
2965    }
2966
2967    #[test]
2968    fn test_integrity_unicode_rtl_override_blocked() {
2969        // U+202E RIGHT-TO-LEFT OVERRIDE - can visually reverse path display
2970        // This could make "etc/passwd/../" appear as a safe path when it's actually traversal
2971        assert_integrity_path_blocked("etc/passwd/\u{202E}../");
2972    }
2973
2974    #[test]
2975    fn test_integrity_unicode_ltr_override_blocked() {
2976        // U+202D LEFT-TO-RIGHT OVERRIDE - directional override
2977        assert_integrity_path_blocked("\u{202D}../etc/passwd");
2978    }
2979
2980    #[test]
2981    fn test_integrity_unicode_rtl_embedding_blocked() {
2982        // U+202B RIGHT-TO-LEFT EMBEDDING
2983        assert_integrity_path_blocked("\u{202B}../etc/passwd");
2984    }
2985
2986    #[test]
2987    fn test_integrity_unicode_rtl_isolate_blocked() {
2988        // U+2067 RIGHT-TO-LEFT ISOLATE
2989        assert_integrity_path_blocked("\u{2067}../etc/passwd");
2990    }
2991
2992    #[test]
2993    fn test_integrity_unicode_zero_width_joiner_blocked() {
2994        // U+200D ZERO WIDTH JOINER - invisible character that could split tokens
2995        assert_integrity_path_blocked(".\u{200D}./etc/passwd");
2996    }
2997
2998    #[test]
2999    fn test_integrity_unicode_zero_width_non_joiner_blocked() {
3000        // U+200C ZERO WIDTH NON-JOINER
3001        assert_integrity_path_blocked(".\u{200C}./etc/passwd");
3002    }
3003
3004    #[test]
3005    fn test_integrity_unicode_zero_width_space_blocked() {
3006        // U+200B ZERO WIDTH SPACE - invisible character
3007        assert_integrity_path_blocked("..\u{200B}/etc/passwd");
3008    }
3009
3010    #[test]
3011    fn test_integrity_unicode_bom_blocked() {
3012        // U+FEFF BYTE ORDER MARK (ZERO WIDTH NO-BREAK SPACE)
3013        assert_integrity_path_blocked("\u{FEFF}../etc/passwd");
3014    }
3015
3016    #[test]
3017    fn test_integrity_unicode_fraction_slash_blocked() {
3018        // U+2044 FRACTION SLASH - visually similar to /
3019        assert_integrity_path_blocked("..\u{2044}etc\u{2044}passwd");
3020    }
3021
3022    #[test]
3023    fn test_integrity_unicode_division_slash_blocked() {
3024        // U+2215 DIVISION SLASH - visually similar to /
3025        assert_integrity_path_blocked("..\u{2215}etc\u{2215}passwd");
3026    }
3027
3028    #[test]
3029    fn test_integrity_unicode_big_solidus_blocked() {
3030        // U+29F8 BIG SOLIDUS - another slash look-alike
3031        assert_integrity_path_blocked("..\u{29F8}etc\u{29F8}passwd");
3032    }
3033
3034    #[test]
3035    fn test_integrity_unicode_vai_full_stop_blocked() {
3036        // U+A60E VAI FULL STOP - dot look-alike
3037        assert_integrity_path_blocked("\u{A60E}\u{A60E}/etc/passwd");
3038    }
3039
3040    #[test]
3041    fn test_integrity_unicode_syriac_full_stop_blocked() {
3042        // U+0701 SYRIAC SUPRALINEAR FULL STOP - dot look-alike
3043        assert_integrity_path_blocked("\u{0701}\u{0701}/etc/passwd");
3044    }
3045
3046    // --- NFD/NFC normalization form tests ---
3047
3048    #[test]
3049    fn test_integrity_unicode_nfd_decomposed_not_exploitable() {
3050        // NFD decomposition of certain characters could potentially be exploited
3051        // For example, some characters have canonical decompositions
3052        // This test verifies that legitimate paths with accented chars work
3053        let temp = TempDir::new().unwrap();
3054        let site_dir = temp.path();
3055
3056        // Create a file with an accented filename (NFC form - precomposed)
3057        let target = site_dir.join("café.txt");
3058        fs::write(&target, "coffee").unwrap();
3059
3060        let hash = compute_file_hash(&target).unwrap();
3061        let size = fs::metadata(&target).unwrap().len();
3062        let mut files = BTreeMap::new();
3063        files.insert(
3064            "café.txt".to_string(),
3065            IntegrityEntry { sha256: hash, size },
3066        );
3067
3068        let manifest = IntegrityManifest {
3069            version: 1,
3070            generated_at: "2025-01-01T00:00:00Z".to_string(),
3071            files,
3072        };
3073        fs::write(
3074            site_dir.join("integrity.json"),
3075            serde_json::to_string(&manifest).unwrap(),
3076        )
3077        .unwrap();
3078
3079        // Legitimate accented filenames should be allowed
3080        let result = check_integrity(site_dir, false);
3081        assert!(
3082            result.passed,
3083            "Legitimate accented filename should be allowed: {:?}",
3084            result.details
3085        );
3086    }
3087
3088    // --- Unit tests for extended helper functions ---
3089
3090    #[test]
3091    fn test_contains_unicode_path_attack_detects_combining_overlay() {
3092        assert!(contains_unicode_path_attack("\u{0338}")); // COMBINING LONG SOLIDUS OVERLAY
3093        assert!(contains_unicode_path_attack("\u{0337}")); // COMBINING SHORT SOLIDUS OVERLAY
3094    }
3095
3096    #[test]
3097    fn test_contains_unicode_path_attack_detects_zero_width() {
3098        assert!(contains_unicode_path_attack("\u{200D}")); // ZERO WIDTH JOINER
3099        assert!(contains_unicode_path_attack("\u{200C}")); // ZERO WIDTH NON-JOINER
3100        assert!(contains_unicode_path_attack("\u{200B}")); // ZERO WIDTH SPACE
3101        assert!(contains_unicode_path_attack("\u{FEFF}")); // BOM
3102    }
3103
3104    #[test]
3105    fn test_contains_unicode_path_attack_detects_rtl_overrides() {
3106        assert!(contains_unicode_path_attack("\u{202E}")); // RTL OVERRIDE
3107        assert!(contains_unicode_path_attack("\u{202D}")); // LTR OVERRIDE
3108        assert!(contains_unicode_path_attack("\u{202B}")); // RTL EMBEDDING
3109        assert!(contains_unicode_path_attack("\u{2067}")); // RTL ISOLATE
3110    }
3111
3112    #[test]
3113    fn test_contains_unicode_path_attack_detects_confusable_slashes() {
3114        assert!(contains_unicode_path_attack("\u{2044}")); // FRACTION SLASH
3115        assert!(contains_unicode_path_attack("\u{2215}")); // DIVISION SLASH
3116        assert!(contains_unicode_path_attack("\u{29F8}")); // BIG SOLIDUS
3117    }
3118
3119    #[test]
3120    fn test_contains_unicode_path_attack_detects_confusable_dots() {
3121        assert!(contains_unicode_path_attack("\u{A60E}")); // VAI FULL STOP
3122        assert!(contains_unicode_path_attack("\u{0701}")); // SYRIAC SUPRALINEAR FULL STOP
3123        assert!(contains_unicode_path_attack("\u{0702}")); // SYRIAC SUBLINEAR FULL STOP
3124    }
3125
3126    #[test]
3127    fn test_detect_encoded_path_violation_rtl_override() {
3128        let result = detect_encoded_path_violation("etc/passwd/\u{202E}../");
3129        assert_eq!(result, Some("unicode normalization attack".to_string()));
3130    }
3131
3132    #[test]
3133    fn test_detect_encoded_path_violation_zero_width_joiner() {
3134        let result = detect_encoded_path_violation(".\u{200D}./etc/passwd");
3135        assert_eq!(result, Some("unicode normalization attack".to_string()));
3136    }
3137
3138    #[test]
3139    fn test_detect_encoded_path_violation_fraction_slash() {
3140        let result = detect_encoded_path_violation("..\u{2044}etc\u{2044}passwd");
3141        assert_eq!(result, Some("unicode normalization attack".to_string()));
3142    }
3143}