Skip to main content

coding_agent_search/pages/
verify.rs

1//! Verify command for CI pipelines.
2//!
3//! Provides `cass pages --verify <PATH>` to validate an existing export bundle for CI/CD.
4//! The verifier confirms correct structure, config schema, payload integrity, and
5//! the absence of secrets in site/.
6
7use anyhow::{Context, Result};
8use base64::prelude::*;
9use serde::{Deserialize, Serialize};
10use serde_json::{Map, Value};
11use sha2::{Digest, Sha256};
12use std::collections::HashSet;
13use std::fs::{self, File};
14use std::io::{BufReader, Read};
15use std::path::Path;
16
17use super::archive_config::{ArchiveConfig, UnencryptedConfig};
18use super::bundle::IntegrityManifest;
19use super::encrypt::{EncryptionConfig, SCHEMA_VERSION};
20use std::fmt;
21
22/// Maximum chunk file size (GitHub Pages hard limit)
23const MAX_CHUNK_SIZE: u64 = 100 * 1024 * 1024; // 100 MB
24
25/// Maximum chunk_size config value (32 MiB)
26const MAX_CONFIG_CHUNK_SIZE: usize = 32 * 1024 * 1024;
27
28/// Required files that must exist in site/
29const REQUIRED_FILES: &[&str] = &[
30    "index.html",
31    "config.json",
32    "sw.js",
33    "viewer.js",
34    "auth.js",
35    "styles.css",
36    "robots.txt",
37    ".nojekyll",
38];
39
40/// Files that indicate secret leakage
41const SECRET_FILES: &[&str] = &[
42    "recovery-secret.txt",
43    "qr-code.png",
44    "qr-code.svg",
45    "master-key.json",
46];
47
48/// Directories that should not exist in site/
49const SECRET_DIRS: &[&str] = &["private"];
50
51/// JSON keys in config.json that indicate plaintext secret leakage.
52const FORBIDDEN_CONFIG_KEYS: &[(&str, &str)] = &[
53    ("password", "password field"),
54    ("secret", "secret field"),
55    ("private_key", "private_key field"),
56    ("master_key", "master_key field"),
57    ("recovery_secret", "recovery_secret"),
58];
59
60const ENCRYPTED_CONFIG_KEYS: &[&str] = &[
61    "version",
62    "export_id",
63    "base_nonce",
64    "compression",
65    "kdf_defaults",
66    "payload",
67    "key_slots",
68];
69const UNENCRYPTED_CONFIG_KEYS: &[&str] = &["encrypted", "version", "payload", "warning"];
70const ENCRYPTED_PAYLOAD_KEYS: &[&str] = &[
71    "chunk_size",
72    "chunk_count",
73    "total_compressed_size",
74    "total_plaintext_size",
75    "files",
76];
77const UNENCRYPTED_PAYLOAD_KEYS: &[&str] = &["path", "format", "size_bytes"];
78const ARGON2_PARAM_KEYS: &[&str] = &["memory_kb", "iterations", "parallelism"];
79const KEY_SLOT_KEYS: &[&str] = &[
80    "id",
81    "slot_type",
82    "kdf",
83    "salt",
84    "wrapped_dek",
85    "nonce",
86    "argon2_params",
87];
88
89/// Verification result for a single check
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct CheckResult {
92    /// Whether the check passed
93    pub passed: bool,
94    /// Details about the check (empty if passed, error message if failed)
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub details: Option<String>,
97}
98
99impl CheckResult {
100    fn pass() -> Self {
101        Self {
102            passed: true,
103            details: None,
104        }
105    }
106
107    fn fail(details: impl Into<String>) -> Self {
108        Self {
109            passed: false,
110            details: Some(details.into()),
111        }
112    }
113}
114
115/// Summary of all verification checks
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct VerifyChecks {
118    pub required_files: CheckResult,
119    pub config_schema: CheckResult,
120    pub payload_manifest: CheckResult,
121    pub size_limits: CheckResult,
122    pub integrity: CheckResult,
123    pub no_secrets_in_site: CheckResult,
124}
125
126impl VerifyChecks {
127    /// Returns true if all checks passed
128    pub fn all_passed(&self) -> bool {
129        self.required_files.passed
130            && self.config_schema.passed
131            && self.payload_manifest.passed
132            && self.size_limits.passed
133            && self.integrity.passed
134            && self.no_secrets_in_site.passed
135    }
136}
137
138/// Complete verification result
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct VerifyResult {
141    /// Overall status: "valid" or "invalid"
142    pub status: String,
143    /// Individual check results
144    pub checks: VerifyChecks,
145    /// Warning messages (non-fatal issues)
146    pub warnings: Vec<String>,
147    /// Total site size in bytes
148    pub site_size_bytes: u64,
149}
150
151/// Verify a bundle export
152///
153/// # Arguments
154/// * `path` - Path to the export root (containing site/) or site/ directory itself
155/// * `verbose` - Whether to print detailed progress
156///
157/// # Returns
158/// `VerifyResult` with all check outcomes
159pub fn verify_bundle(path: &Path, verbose: bool) -> Result<VerifyResult> {
160    // Resolve to site/ directory
161    let site_dir = super::resolve_site_dir(path)?;
162
163    if verbose {
164        println!("Verifying bundle at: {}", site_dir.display());
165    }
166
167    let warnings = Vec::new();
168
169    // Check 1: Required files
170    if verbose {
171        println!("  Checking required files...");
172    }
173    let required_files = check_required_files(&site_dir);
174
175    // Check 2: Config schema (only if config.json exists)
176    if verbose {
177        println!("  Checking config.json schema...");
178    }
179    let config_schema = if site_dir.join("config.json").exists() {
180        check_config_schema(&site_dir)
181    } else {
182        CheckResult::fail("config.json not found")
183    };
184
185    // Check 3: Payload manifest
186    if verbose {
187        println!("  Checking payload manifest...");
188    }
189    let payload_manifest = check_payload_manifest(&site_dir);
190
191    // Check 4: Size limits
192    if verbose {
193        println!("  Checking size limits...");
194    }
195    let size_limits = check_size_limits(&site_dir);
196
197    // Check 5: Integrity (if integrity.json exists)
198    if verbose {
199        println!("  Checking integrity...");
200    }
201    let integrity = if site_dir.join("integrity.json").exists() {
202        check_integrity(&site_dir, verbose)
203    } else {
204        CheckResult::fail("integrity.json missing — bundle integrity cannot be verified")
205    };
206
207    // Check 6: No secrets in site/
208    if verbose {
209        println!("  Checking for secret leakage...");
210    }
211    let no_secrets_in_site = check_no_secrets(&site_dir);
212
213    // Calculate total site size
214    let site_size_bytes = calculate_dir_size(&site_dir)?;
215
216    let checks = VerifyChecks {
217        required_files,
218        config_schema,
219        payload_manifest,
220        size_limits,
221        integrity,
222        no_secrets_in_site,
223    };
224
225    let status = if checks.all_passed() {
226        "valid".to_string()
227    } else {
228        "invalid".to_string()
229    };
230
231    Ok(VerifyResult {
232        status,
233        checks,
234        warnings,
235        site_size_bytes,
236    })
237}
238
239/// Check that all required files exist
240fn check_required_files(site_dir: &Path) -> CheckResult {
241    let mut missing = Vec::new();
242    let mut invalid = Vec::new();
243
244    for file in REQUIRED_FILES {
245        let path = site_dir.join(file);
246        match fs::symlink_metadata(&path) {
247            Ok(metadata) => {
248                let file_type = metadata.file_type();
249                if file_type.is_file() {
250                    continue;
251                }
252                invalid.push(format!("{file} (must be a regular file)"));
253            }
254            Err(_) => missing.push(*file),
255        }
256    }
257
258    // Also check payload/ directory exists
259    if !site_dir.join("payload").is_dir() {
260        missing.push("payload/");
261    }
262
263    if missing.is_empty() && invalid.is_empty() {
264        CheckResult::pass()
265    } else {
266        let mut parts = Vec::new();
267        if !missing.is_empty() {
268            parts.push(format!("Missing files: {}", missing.join(", ")));
269        }
270        if !invalid.is_empty() {
271            parts.push(format!("Invalid required files: {}", invalid.join(", ")));
272        }
273        CheckResult::fail(parts.join("; "))
274    }
275}
276
277/// Check config.json schema validity
278fn check_config_schema(site_dir: &Path) -> CheckResult {
279    let config_path = site_dir.join("config.json");
280
281    let content = match fs::read_to_string(&config_path).context("Failed to read config.json") {
282        Ok(content) => content,
283        Err(e) => return CheckResult::fail(format!("Failed to read config.json: {}", e)),
284    };
285
286    let config_json: Value =
287        match serde_json::from_str(&content).context("Failed to parse JSON syntax") {
288            Ok(json) => json,
289            Err(e) => return CheckResult::fail(format!("Failed to parse config.json: {}", e)),
290        };
291
292    let unknown_field_errors = find_unknown_config_fields(&config_json);
293    if !unknown_field_errors.is_empty() {
294        return CheckResult::fail(unknown_field_errors.join("; "));
295    }
296
297    let config: ArchiveConfig = match serde_json::from_value(config_json) {
298        Ok(c) => c,
299        Err(e) => return CheckResult::fail(format!("Failed to parse config.json: {}", e)),
300    };
301
302    let errors = match &config {
303        ArchiveConfig::Encrypted(enc) => validate_encrypted_config(enc),
304        ArchiveConfig::Unencrypted(unenc) => validate_unencrypted_config(unenc),
305    };
306
307    if errors.is_empty() {
308        CheckResult::pass()
309    } else {
310        CheckResult::fail(errors.join("; "))
311    }
312}
313
314fn find_unknown_config_fields(value: &Value) -> Vec<String> {
315    let mut errors = Vec::new();
316    let Some(root) = value.as_object() else {
317        return errors;
318    };
319
320    if root.contains_key("encrypted") {
321        collect_unknown_fields(root, UNENCRYPTED_CONFIG_KEYS, "", &mut errors);
322        if let Some(payload) = root.get("payload").and_then(Value::as_object) {
323            collect_unknown_fields(payload, UNENCRYPTED_PAYLOAD_KEYS, "payload", &mut errors);
324        }
325    } else {
326        collect_unknown_fields(root, ENCRYPTED_CONFIG_KEYS, "", &mut errors);
327        if let Some(payload) = root.get("payload").and_then(Value::as_object) {
328            collect_unknown_fields(payload, ENCRYPTED_PAYLOAD_KEYS, "payload", &mut errors);
329        }
330        if let Some(params) = root.get("kdf_defaults").and_then(Value::as_object) {
331            collect_unknown_fields(params, ARGON2_PARAM_KEYS, "kdf_defaults", &mut errors);
332        }
333        if let Some(slots) = root.get("key_slots").and_then(Value::as_array) {
334            for (idx, slot) in slots.iter().enumerate() {
335                if let Some(slot_obj) = slot.as_object() {
336                    let slot_path = format!("key_slots[{idx}]");
337                    collect_unknown_fields(slot_obj, KEY_SLOT_KEYS, &slot_path, &mut errors);
338                    if let Some(params) = slot_obj.get("argon2_params").and_then(Value::as_object) {
339                        collect_unknown_fields(
340                            params,
341                            ARGON2_PARAM_KEYS,
342                            &format!("{slot_path}.argon2_params"),
343                            &mut errors,
344                        );
345                    }
346                }
347            }
348        }
349    }
350
351    errors
352}
353
354fn collect_unknown_fields(
355    object: &Map<String, Value>,
356    allowed_keys: &[&str],
357    current_path: &str,
358    errors: &mut Vec<String>,
359) {
360    for key in object.keys() {
361        if !allowed_keys.contains(&key.as_str()) {
362            let path = if current_path.is_empty() {
363                key.clone()
364            } else {
365                format!("{current_path}.{key}")
366            };
367            errors.push(format!("config.json contains unknown field: {path}"));
368        }
369    }
370}
371
372fn validate_encrypted_config(config: &EncryptionConfig) -> Vec<String> {
373    let mut errors = Vec::new();
374
375    if config.version != SCHEMA_VERSION {
376        errors.push(format!(
377            "version must be {}; got {}. The current encrypted pages format supports only schema version {}.",
378            SCHEMA_VERSION, config.version, SCHEMA_VERSION
379        ));
380    }
381
382    // Validate export_id (base64, 16 bytes)
383    match BASE64_STANDARD.decode(&config.export_id) {
384        Ok(bytes) if bytes.len() == 16 => {}
385        Ok(bytes) => errors.push(format!("export_id should be 16 bytes, got {}", bytes.len())),
386        Err(e) => errors.push(format!("export_id is not valid base64: {}", e)),
387    }
388
389    // Validate base_nonce (base64, 12 bytes)
390    match BASE64_STANDARD.decode(&config.base_nonce) {
391        Ok(bytes) if bytes.len() == 12 => {}
392        Ok(bytes) => errors.push(format!(
393            "base_nonce should be 12 bytes, got {}",
394            bytes.len()
395        )),
396        Err(e) => errors.push(format!("base_nonce is not valid base64: {}", e)),
397    }
398
399    // Validate compression. The current encrypted archive format always emits
400    // deflate chunks, and the Rust decryptor always inflates chunks as deflate.
401    if config.compression != "deflate" {
402        errors.push(format!(
403            "compression must be 'deflate'; got '{}'. The current encrypted pages format supports only deflate.",
404            config.compression
405        ));
406    }
407
408    // Validate chunk_size
409    if config.payload.chunk_size == 0 {
410        errors.push("chunk_size cannot be zero".to_string());
411    }
412    if config.payload.chunk_size > MAX_CONFIG_CHUNK_SIZE {
413        errors.push(format!(
414            "chunk_size {} exceeds maximum {}",
415            config.payload.chunk_size, MAX_CONFIG_CHUNK_SIZE
416        ));
417    }
418
419    // Empty encrypted exports are valid: a zero-byte input produces an empty
420    // file list and the decryptors concatenate zero chunks into an empty DB
421    // byte buffer.
422
423    // Validate files list matches chunk_count
424    if config.payload.files.len() != config.payload.chunk_count {
425        errors.push(format!(
426            "files list length ({}) doesn't match chunk_count ({})",
427            config.payload.files.len(),
428            config.payload.chunk_count
429        ));
430    }
431
432    // Validate payload file paths (relative, under payload/, no parent traversal)
433    for (i, file) in config.payload.files.iter().enumerate() {
434        let path = Path::new(file);
435        if path.is_absolute() {
436            errors.push(format!("payload.files[{}] must be relative", i));
437        }
438        if path
439            .components()
440            .any(|c| matches!(c, std::path::Component::ParentDir))
441        {
442            errors.push(format!("payload.files[{}] must not contain '..'", i));
443        }
444        if !path.starts_with("payload") {
445            errors.push(format!("payload.files[{}] must reside under payload/", i));
446        }
447    }
448
449    // Validate key_slots
450    if config.key_slots.is_empty() {
451        errors.push("key_slots cannot be empty".to_string());
452    }
453
454    for (i, slot) in config.key_slots.iter().enumerate() {
455        // Validate slot.salt is base64
456        if BASE64_STANDARD.decode(&slot.salt).is_err() {
457            errors.push(format!("key_slot[{}].salt is not valid base64", i));
458        }
459
460        // Validate slot.wrapped_dek is base64
461        if BASE64_STANDARD.decode(&slot.wrapped_dek).is_err() {
462            errors.push(format!("key_slot[{}].wrapped_dek is not valid base64", i));
463        }
464
465        // Validate slot.nonce is base64
466        if BASE64_STANDARD.decode(&slot.nonce).is_err() {
467            errors.push(format!("key_slot[{}].nonce is not valid base64", i));
468        }
469    }
470
471    errors
472}
473
474fn validate_unencrypted_config(config: &UnencryptedConfig) -> Vec<String> {
475    let mut errors = Vec::new();
476
477    if config.encrypted {
478        errors.push("unencrypted config must set encrypted=false".to_string());
479    }
480
481    if config.version.trim().is_empty() {
482        errors.push("version cannot be empty".to_string());
483    }
484
485    if config.payload.path.trim().is_empty() {
486        errors.push("payload.path cannot be empty".to_string());
487    } else {
488        let path = Path::new(&config.payload.path);
489        validate_payload_path(&mut errors, "payload.path", path);
490    }
491
492    let valid_formats = ["sqlite"];
493    if !valid_formats.contains(&config.payload.format.as_str()) {
494        errors.push(format!(
495            "payload.format should be one of {:?}, got '{}'",
496            valid_formats, config.payload.format
497        ));
498    }
499
500    errors
501}
502
503fn validate_payload_path(errors: &mut Vec<String>, label: &str, path: &Path) -> bool {
504    let mut ok = true;
505    if path.is_absolute() {
506        errors.push(format!("{label} must be relative"));
507        ok = false;
508    }
509    if path
510        .components()
511        .any(|c| matches!(c, std::path::Component::ParentDir))
512    {
513        errors.push(format!("{label} must not contain '..'"));
514        ok = false;
515    }
516    if !path.starts_with("payload") {
517        errors.push(format!("{label} must reside under payload/"));
518        ok = false;
519    }
520    ok
521}
522
523/// Check payload manifest validity
524fn check_payload_manifest(site_dir: &Path) -> CheckResult {
525    let config_path = site_dir.join("config.json");
526    let payload_dir = site_dir.join("payload");
527
528    if !payload_dir.exists() {
529        return CheckResult::fail("payload/ directory not found");
530    }
531
532    // Parse config for expected payload
533    let config: ArchiveConfig = match File::open(&config_path)
534        .and_then(|f| Ok(serde_json::from_reader(BufReader::new(f))?))
535    {
536        Ok(c) => c,
537        Err(_) => return CheckResult::fail("Could not parse config.json"),
538    };
539
540    let mut errors = Vec::new();
541
542    match &config {
543        ArchiveConfig::Encrypted(enc) => {
544            // Check each expected chunk file exists
545            for (i, expected_file) in enc.payload.files.iter().enumerate() {
546                // Security: Verify filename follows expected pattern first (defense-in-depth)
547                // This also implicitly prevents path traversal since valid patterns are "payload/chunk-NNNNN.bin"
548                let expected_name = format!("payload/chunk-{:05}.bin", i);
549                if *expected_file != expected_name {
550                    errors.push(format!(
551                        "Chunk {} has unexpected name: {} (expected {})",
552                        i, expected_file, expected_name
553                    ));
554                    // Skip existence check for malformed paths to prevent path traversal
555                    continue;
556                }
557
558                let chunk_path = site_dir.join(expected_file);
559                match fs::symlink_metadata(&chunk_path) {
560                    Ok(meta) => {
561                        let file_type = meta.file_type();
562                        if file_type.is_symlink() {
563                            errors.push(format!("{expected_file} must not be a symlink"));
564                        } else if !file_type.is_file() {
565                            errors.push(format!("{expected_file} must be a regular file"));
566                        }
567                    }
568                    Err(_) => errors.push(format!("Missing chunk file: {}", expected_file)),
569                }
570            }
571
572            // Inventory chunk files to detect malformed names and out-of-range indices.
573            match fs::read_dir(&payload_dir) {
574                Ok(entries) => {
575                    for entry in entries {
576                        let entry = match entry {
577                            Ok(entry) => entry,
578                            Err(err) => {
579                                errors
580                                    .push(format!("Failed to read payload directory entry: {err}"));
581                                continue;
582                            }
583                        };
584                        let name = entry.file_name();
585                        let name_str = name.to_string_lossy();
586                        if !name_str.starts_with("chunk-") || !name_str.ends_with(".bin") {
587                            continue;
588                        }
589
590                        let Some(num_str) = name_str
591                            .strip_prefix("chunk-")
592                            .and_then(|s| s.strip_suffix(".bin"))
593                        else {
594                            errors.push(format!("Malformed chunk filename: {name_str}"));
595                            continue;
596                        };
597
598                        if num_str.len() < 5 || !num_str.chars().all(|c| c.is_ascii_digit()) {
599                            errors.push(format!("Malformed chunk filename: {name_str}"));
600                            continue;
601                        }
602
603                        let idx = match num_str.parse::<usize>() {
604                            Ok(idx) => idx,
605                            Err(_) => {
606                                errors.push(format!("Malformed chunk filename: {name_str}"));
607                                continue;
608                            }
609                        };
610
611                        if idx >= enc.payload.files.len() {
612                            errors.push(format!("Unexpected chunk file index: chunk-{idx:05}.bin"));
613                        }
614                    }
615                }
616                Err(err) => errors.push(format!("Failed to read payload/ directory: {err}")),
617            }
618        }
619        ArchiveConfig::Unencrypted(unenc) => {
620            let rel_path = Path::new(&unenc.payload.path);
621            if validate_payload_path(&mut errors, "payload.path", rel_path) {
622                let payload_path = site_dir.join(rel_path);
623                match fs::symlink_metadata(&payload_path) {
624                    Ok(meta) => {
625                        let file_type = meta.file_type();
626                        if file_type.is_symlink() {
627                            errors.push(format!("{} must not be a symlink", unenc.payload.path));
628                        } else if !file_type.is_file() {
629                            errors.push(format!("{} must be a regular file", unenc.payload.path));
630                        }
631                    }
632                    Err(_) => errors.push(format!("Missing payload file: {}", unenc.payload.path)),
633                }
634            }
635        }
636    }
637
638    if errors.is_empty() {
639        CheckResult::pass()
640    } else {
641        CheckResult::fail(errors.join("; "))
642    }
643}
644
645/// Check size limits for chunk files
646fn check_size_limits(site_dir: &Path) -> CheckResult {
647    let mut errors = Vec::new();
648
649    let config_path = site_dir.join("config.json");
650    let config: ArchiveConfig = match File::open(&config_path)
651        .context("Failed to open config.json")
652        .and_then(|f| serde_json::from_reader(BufReader::new(f)).context("Failed to parse JSON"))
653    {
654        Ok(c) => c,
655        Err(e) => {
656            return CheckResult::fail(format!("Failed to parse config.json: {}", e));
657        }
658    };
659
660    match &config {
661        ArchiveConfig::Encrypted(_) => {
662            let payload_dir = site_dir.join("payload");
663            if !payload_dir.is_dir() {
664                errors.push("payload/ directory not found for size check".to_string());
665            } else {
666                match fs::read_dir(&payload_dir) {
667                    Ok(entries) => {
668                        for entry in entries {
669                            let entry = match entry {
670                                Ok(entry) => entry,
671                                Err(err) => {
672                                    errors.push(format!(
673                                        "Failed to read payload directory entry: {err}"
674                                    ));
675                                    continue;
676                                }
677                            };
678                            let path = entry.path();
679                            if path.extension().map(|e| e == "bin").unwrap_or(false) {
680                                match fs::symlink_metadata(&path) {
681                                    Ok(meta) => {
682                                        let file_type = meta.file_type();
683                                        if file_type.is_symlink() {
684                                            errors.push(format!(
685                                                "{} must not be a symlink",
686                                                path.file_name()
687                                                    .unwrap_or_default()
688                                                    .to_string_lossy()
689                                            ));
690                                            continue;
691                                        }
692                                        if !file_type.is_file() {
693                                            errors.push(format!(
694                                                "{} must be a regular file",
695                                                path.file_name()
696                                                    .unwrap_or_default()
697                                                    .to_string_lossy()
698                                            ));
699                                            continue;
700                                        }
701                                        if meta.len() > MAX_CHUNK_SIZE {
702                                            errors.push(format!(
703                                                "{} exceeds 100MB limit ({} bytes)",
704                                                path.file_name()
705                                                    .unwrap_or_default()
706                                                    .to_string_lossy(),
707                                                meta.len()
708                                            ));
709                                        }
710                                    }
711                                    Err(err) => errors.push(format!(
712                                        "failed to stat {}: {}",
713                                        path.file_name().unwrap_or_default().to_string_lossy(),
714                                        err
715                                    )),
716                                }
717                            }
718                        }
719                    }
720                    Err(err) => errors.push(format!("Failed to read payload/ directory: {err}")),
721                }
722            }
723        }
724        ArchiveConfig::Unencrypted(unenc) => {
725            let payload_path = Path::new(&unenc.payload.path);
726            if validate_payload_path(&mut errors, "payload.path", payload_path) {
727                let payload_path = site_dir.join(payload_path);
728                if !payload_path.exists() {
729                    errors.push(format!(
730                        "payload file not found for size check: {}",
731                        unenc.payload.path
732                    ));
733                } else {
734                    match fs::symlink_metadata(&payload_path) {
735                        Ok(meta) => {
736                            let file_type = meta.file_type();
737                            if file_type.is_symlink() {
738                                errors
739                                    .push(format!("{} must not be a symlink", unenc.payload.path));
740                            } else if !file_type.is_file() {
741                                errors
742                                    .push(format!("{} must be a regular file", unenc.payload.path));
743                            } else if meta.len() > MAX_CHUNK_SIZE {
744                                errors.push(format!(
745                                    "{} exceeds 100MB limit ({} bytes)",
746                                    unenc.payload.path,
747                                    meta.len()
748                                ));
749                            }
750                        }
751                        Err(err) => errors.push(format!(
752                            "failed to stat payload file {}: {}",
753                            unenc.payload.path, err
754                        )),
755                    }
756                }
757            }
758        }
759    }
760
761    if errors.is_empty() {
762        CheckResult::pass()
763    } else {
764        CheckResult::fail(errors.join("; "))
765    }
766}
767
768/// Check integrity.json hashes match file contents
769fn check_integrity(site_dir: &Path, verbose: bool) -> CheckResult {
770    let integrity_path = site_dir.join("integrity.json");
771
772    let manifest: IntegrityManifest = match File::open(&integrity_path)
773        .context("Failed to open integrity.json")
774        .and_then(|f| serde_json::from_reader(BufReader::new(f)).context("Failed to parse JSON"))
775    {
776        Ok(m) => m,
777        Err(e) => return CheckResult::fail(format!("Failed to parse integrity.json: {}", e)),
778    };
779
780    let mut errors = Vec::new();
781    let mut checked_files: HashSet<String> = HashSet::new();
782    let canonical_site = match site_dir.canonicalize() {
783        Ok(path) => path,
784        Err(e) => {
785            return CheckResult::fail(format!(
786                "Failed to resolve site directory for integrity checks: {}",
787                e
788            ));
789        }
790    };
791
792    // Verify each file in manifest
793    for (rel_path, entry) in &manifest.files {
794        checked_files.insert(rel_path.clone());
795
796        if let Some(reason) = detect_encoded_path_violation(rel_path) {
797            errors.push(format!(
798                "integrity.json contains {reason} (security violation): {}",
799                rel_path
800            ));
801            continue;
802        }
803
804        // Security: Validate path doesn't escape site_dir via traversal
805        let path = Path::new(rel_path);
806        if path.is_absolute() {
807            errors.push(format!(
808                "integrity.json contains absolute path (security violation): {}",
809                rel_path
810            ));
811            continue;
812        }
813        if path
814            .components()
815            .any(|c| matches!(c, std::path::Component::ParentDir))
816        {
817            errors.push(format!(
818                "integrity.json contains path traversal (security violation): {}",
819                rel_path
820            ));
821            continue;
822        }
823
824        let file_path = site_dir.join(rel_path);
825        let metadata = match fs::symlink_metadata(&file_path) {
826            Ok(meta) => meta,
827            Err(_) => {
828                errors.push(format!("File in manifest but missing: {}", rel_path));
829                continue;
830            }
831        };
832
833        let file_type = metadata.file_type();
834
835        if !file_type.is_file() && !file_type.is_symlink() {
836            errors.push(format!(
837                "integrity.json references non-file entry (security violation): {}",
838                rel_path
839            ));
840            continue;
841        }
842
843        // Resolve symlinks and ensure final target remains within site_dir.
844        let canonical_file = match file_path.canonicalize() {
845            Ok(path) => path,
846            Err(_) => {
847                errors.push(format!("File in manifest but missing: {}", rel_path));
848                continue;
849            }
850        };
851        if !canonical_file.starts_with(&canonical_site) {
852            errors.push(format!(
853                "integrity.json path escapes site directory (security violation): {}",
854                rel_path
855            ));
856            continue;
857        }
858
859        // For symlinks, only permit links to regular files within site_dir.
860        if file_type.is_symlink() {
861            match fs::metadata(&file_path) {
862                Ok(target_meta) if target_meta.file_type().is_file() => {}
863                Ok(_) => {
864                    errors.push(format!(
865                        "integrity.json symlink target is not a regular file (security violation): {}",
866                        rel_path
867                    ));
868                    continue;
869                }
870                Err(e) => {
871                    errors.push(format!(
872                        "Failed to resolve symlink target for {}: {}",
873                        rel_path, e
874                    ));
875                    continue;
876                }
877            }
878        }
879
880        // Fast-fail on size mismatch before the expensive SHA256 hash.
881        // Use the canonical path so symlinks resolve to the actual target size.
882        if let Ok(actual_meta) = fs::metadata(&canonical_file)
883            && actual_meta.len() != entry.size
884        {
885            errors.push(format!(
886                "Size mismatch for {}: expected {}, got {}",
887                rel_path,
888                entry.size,
889                actual_meta.len()
890            ));
891            continue;
892        }
893
894        // Compute hash
895        let computed_hash = match compute_file_hash(&file_path) {
896            Ok(h) => h,
897            Err(e) => {
898                errors.push(format!("Failed to hash {}: {}", rel_path, e));
899                continue;
900            }
901        };
902
903        if computed_hash != entry.sha256 {
904            errors.push(format!(
905                "Hash mismatch for {}: expected {}, got {}",
906                rel_path, entry.sha256, computed_hash
907            ));
908        } else if verbose {
909            println!("    ✓ {}", rel_path);
910        }
911    }
912
913    // Check for extra files not in manifest
914    let actual_files = match collect_all_files(site_dir) {
915        Ok(files) => files,
916        Err(e) => return CheckResult::fail(format!("Failed to enumerate files: {}", e)),
917    };
918    for file in actual_files {
919        // Skip integrity.json itself
920        if file == "integrity.json" {
921            continue;
922        }
923        if !checked_files.contains(&file) {
924            errors.push(format!("File not in manifest: {}", file));
925        }
926    }
927
928    if errors.is_empty() {
929        CheckResult::pass()
930    } else {
931        CheckResult::fail(errors.join("; "))
932    }
933}
934
935#[derive(Debug)]
936enum PercentDecodeError {
937    InvalidEncoding,
938    InvalidUtf8,
939    NullByte,
940}
941
942impl fmt::Display for PercentDecodeError {
943    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
944        match self {
945            Self::InvalidEncoding => write!(f, "invalid percent-encoding"),
946            Self::InvalidUtf8 => write!(f, "invalid UTF-8 after percent-decoding"),
947            Self::NullByte => write!(f, "null byte in decoded path"),
948        }
949    }
950}
951
952struct DecodeOutcome {
953    decoded: String,
954    changed: bool,
955}
956
957fn percent_decode_once(input: &str) -> Result<DecodeOutcome, PercentDecodeError> {
958    let bytes = input.as_bytes();
959    let mut out = Vec::with_capacity(bytes.len());
960    let mut i = 0usize;
961    let mut changed = false;
962
963    while i < bytes.len() {
964        if bytes[i] == b'%' {
965            if i + 2 >= bytes.len() {
966                return Err(PercentDecodeError::InvalidEncoding);
967            }
968            let hi = bytes[i + 1];
969            let lo = bytes[i + 2];
970            let hex = [hi, lo];
971            let hex_str =
972                std::str::from_utf8(&hex).map_err(|_| PercentDecodeError::InvalidEncoding)?;
973            let val =
974                u8::from_str_radix(hex_str, 16).map_err(|_| PercentDecodeError::InvalidEncoding)?;
975            out.push(val);
976            i += 3;
977            changed = true;
978            continue;
979        }
980        out.push(bytes[i]);
981        i += 1;
982    }
983
984    if out.contains(&0) {
985        return Err(PercentDecodeError::NullByte);
986    }
987
988    let decoded = String::from_utf8(out).map_err(|_| PercentDecodeError::InvalidUtf8)?;
989    Ok(DecodeOutcome { decoded, changed })
990}
991
992fn contains_path_traversal_like(input: &str) -> bool {
993    input.split(['/', '\\']).any(|segment| segment == "..")
994}
995
996fn is_absolute_like(input: &str) -> bool {
997    let normalized = input.replace('\\', "/");
998    if normalized.starts_with('/') || normalized.starts_with("//") {
999        return true;
1000    }
1001    let bytes = normalized.as_bytes();
1002    bytes.len() >= 2 && bytes[1] == b':' && bytes[0].is_ascii_alphabetic()
1003}
1004
1005/// Check for Unicode characters that are visual look-alikes for path-sensitive
1006/// ASCII characters (`.`, `/`, `\`). These could bypass text-based path checks
1007/// on filesystems that perform Unicode compatibility normalization (NFKC).
1008fn contains_unicode_path_attack(input: &str) -> bool {
1009    for ch in input.chars() {
1010        match ch {
1011            // Fullwidth look-alikes (NFKC maps to ASCII equivalents)
1012            '\u{FF0E}' // FULLWIDTH FULL STOP → .
1013            | '\u{FF0F}' // FULLWIDTH SOLIDUS → /
1014            | '\u{FF3C}' // FULLWIDTH REVERSE SOLIDUS → \
1015            // Small form variants
1016            | '\u{FE52}' // SMALL FULL STOP → .
1017            // Dot leaders / ellipsis components
1018            | '\u{2024}' // ONE DOT LEADER → .
1019            // Halfwidth forms
1020            | '\u{FF61}' // HALFWIDTH IDEOGRAPHIC FULL STOP
1021            // Combining characters that could modify path-sensitive chars
1022            | '\u{0338}' // COMBINING LONG SOLIDUS OVERLAY (could visually disguise)
1023            | '\u{0337}' // COMBINING SHORT SOLIDUS OVERLAY
1024            // Zero-width characters (invisible, could split tokens)
1025            | '\u{200D}' // ZERO WIDTH JOINER
1026            | '\u{200C}' // ZERO WIDTH NON-JOINER
1027            | '\u{200B}' // ZERO WIDTH SPACE
1028            | '\u{FEFF}' // BYTE ORDER MARK / ZERO WIDTH NO-BREAK SPACE
1029            // Right-to-left override (can visually reverse path display)
1030            | '\u{202E}' // RIGHT-TO-LEFT OVERRIDE
1031            | '\u{202D}' // LEFT-TO-RIGHT OVERRIDE
1032            | '\u{202C}' // POP DIRECTIONAL FORMATTING
1033            | '\u{202A}' // LEFT-TO-RIGHT EMBEDDING
1034            | '\u{202B}' // RIGHT-TO-LEFT EMBEDDING
1035            | '\u{2066}' // LEFT-TO-RIGHT ISOLATE
1036            | '\u{2067}' // RIGHT-TO-LEFT ISOLATE
1037            | '\u{2068}' // FIRST STRONG ISOLATE
1038            | '\u{2069}' // POP DIRECTIONAL ISOLATE
1039            // Confusable slash characters
1040            | '\u{2044}' // FRACTION SLASH (visually similar to /)
1041            | '\u{2215}' // DIVISION SLASH (visually similar to /)
1042            | '\u{29F8}' // BIG SOLIDUS
1043            | '\u{1735}' // PHILIPPINE SINGLE PUNCTUATION (looks like /)
1044            // Confusable dot characters
1045            | '\u{2E2E}' // REVERSED QUESTION MARK (can look like period in some fonts)
1046            | '\u{0701}' // SYRIAC SUPRALINEAR FULL STOP
1047            | '\u{0702}' // SYRIAC SUBLINEAR FULL STOP
1048            | '\u{A60E}' // VAI FULL STOP
1049            | '\u{10A50}' // KHAROSHTHI PUNCTUATION DOT
1050            => return true,
1051            _ => {}
1052        }
1053    }
1054    false
1055}
1056
1057fn detect_encoded_path_violation(rel_path: &str) -> Option<String> {
1058    if contains_path_traversal_like(rel_path) {
1059        return Some("path traversal".to_string());
1060    }
1061    if is_absolute_like(rel_path) {
1062        return Some("absolute path".to_string());
1063    }
1064    if contains_unicode_path_attack(rel_path) {
1065        return Some("unicode normalization attack".to_string());
1066    }
1067
1068    if !rel_path.contains('%') {
1069        return None;
1070    }
1071
1072    let mut current = rel_path.to_string();
1073    for _ in 0..3 {
1074        let outcome = match percent_decode_once(&current) {
1075            Ok(o) => o,
1076            Err(e) => return Some(e.to_string()),
1077        };
1078        if !outcome.changed {
1079            break;
1080        }
1081        current = outcome.decoded;
1082        if contains_path_traversal_like(&current) {
1083            return Some("url-encoded path traversal".to_string());
1084        }
1085        if is_absolute_like(&current) {
1086            return Some("url-encoded absolute path".to_string());
1087        }
1088        if contains_unicode_path_attack(&current) {
1089            return Some("url-encoded unicode normalization attack".to_string());
1090        }
1091        if !current.contains('%') {
1092            break;
1093        }
1094    }
1095
1096    None
1097}
1098
1099/// Check for secret leakage in site/
1100fn check_no_secrets(site_dir: &Path) -> CheckResult {
1101    let mut errors = Vec::new();
1102
1103    // Check for forbidden files
1104    for file in SECRET_FILES {
1105        let path = site_dir.join(file);
1106        if fs::symlink_metadata(&path).is_ok() {
1107            errors.push(format!("Secret file found in site/: {}", file));
1108        }
1109    }
1110
1111    // Check for forbidden directories
1112    for dir in SECRET_DIRS {
1113        let path = site_dir.join(dir);
1114        if let Ok(metadata) = fs::symlink_metadata(&path) {
1115            let file_type = metadata.file_type();
1116            if file_type.is_dir() || file_type.is_symlink() {
1117                errors.push(format!("Secret directory found in site/: {}/", dir));
1118            }
1119        }
1120    }
1121
1122    // Recursive scan: detect secret files/dirs hidden in subdirectories
1123    find_secrets_recursive(site_dir, site_dir, &mut errors);
1124
1125    // Check config.json doesn't contain plaintext secrets.
1126    // Walk the parsed JSON tree instead of doing brittle raw substring checks so
1127    // formatting changes like `"secret" : "..."` or nested objects can't hide leakage.
1128    let config_path = site_dir.join("config.json");
1129    if config_path.exists()
1130        && let Ok(content) = fs::read_to_string(&config_path)
1131        && let Ok(config_json) = serde_json::from_str::<Value>(&content)
1132    {
1133        find_forbidden_config_keys(&config_json, "", &mut errors);
1134    }
1135
1136    if errors.is_empty() {
1137        CheckResult::pass()
1138    } else {
1139        CheckResult::fail(errors.join("; "))
1140    }
1141}
1142
1143fn find_forbidden_config_keys(value: &Value, current_path: &str, findings: &mut Vec<String>) {
1144    match value {
1145        Value::Object(map) => {
1146            for (key, child) in map {
1147                let child_path = if current_path.is_empty() {
1148                    key.clone()
1149                } else {
1150                    format!("{current_path}.{key}")
1151                };
1152                if let Some((_, description)) = FORBIDDEN_CONFIG_KEYS
1153                    .iter()
1154                    .find(|(forbidden, _)| key.eq_ignore_ascii_case(forbidden))
1155                {
1156                    findings.push(format!(
1157                        "config.json contains forbidden field: {} at {}",
1158                        description, child_path
1159                    ));
1160                }
1161                find_forbidden_config_keys(child, &child_path, findings);
1162            }
1163        }
1164        Value::Array(items) => {
1165            for (idx, child) in items.iter().enumerate() {
1166                let child_path = if current_path.is_empty() {
1167                    format!("[{idx}]")
1168                } else {
1169                    format!("{current_path}[{idx}]")
1170                };
1171                find_forbidden_config_keys(child, &child_path, findings);
1172            }
1173        }
1174        _ => {}
1175    }
1176}
1177
1178/// Recursively scan a directory tree for secret files and directories.
1179/// Finds entries whose name (not full path) matches SECRET_FILES or SECRET_DIRS
1180/// at any depth, catching secrets hidden in subdirectories.
1181fn find_secrets_recursive(base: &Path, current: &Path, findings: &mut Vec<String>) {
1182    let entries = match fs::read_dir(current) {
1183        Ok(entries) => entries,
1184        Err(_) => return,
1185    };
1186
1187    for entry in entries.flatten() {
1188        let path = entry.path();
1189        let file_type = match entry.file_type() {
1190            Ok(file_type) => file_type,
1191            Err(_) => continue,
1192        };
1193        let name = match entry.file_name().to_str() {
1194            Some(n) => n.to_string(),
1195            None => continue,
1196        };
1197        let is_secret_file = SECRET_FILES.contains(&name.as_str());
1198        let is_secret_dir = SECRET_DIRS.contains(&name.as_str());
1199
1200        let rel_path = path
1201            .strip_prefix(base)
1202            .unwrap_or(&path)
1203            .to_string_lossy()
1204            .replace('\\', "/");
1205
1206        if file_type.is_dir() {
1207            if is_secret_dir {
1208                // Skip if this is a top-level match (already caught above)
1209                if current != base {
1210                    findings.push(format!(
1211                        "Secret directory found in site subdirectory: {}/",
1212                        rel_path
1213                    ));
1214                }
1215            }
1216            // Only recurse into real directories. Symlinked directories are handled below
1217            // so a malicious or accidental loop cannot drag verification outside site/.
1218            find_secrets_recursive(base, &path, findings);
1219        } else if file_type.is_symlink() {
1220            if is_secret_dir {
1221                if current != base {
1222                    findings.push(format!(
1223                        "Secret directory found in site subdirectory: {}/",
1224                        rel_path
1225                    ));
1226                }
1227            } else if is_secret_file && current != base {
1228                findings.push(format!(
1229                    "Secret file found in site subdirectory: {}",
1230                    rel_path
1231                ));
1232            }
1233        } else if file_type.is_file() && is_secret_file {
1234            // Skip if this is a top-level match (already caught above)
1235            if current != base {
1236                findings.push(format!(
1237                    "Secret file found in site subdirectory: {}",
1238                    rel_path
1239                ));
1240            }
1241        }
1242    }
1243}
1244
1245/// Compute SHA256 hash of a file
1246fn compute_file_hash(path: &Path) -> Result<String> {
1247    let file = File::open(path)?;
1248    let mut reader = BufReader::new(file);
1249    let mut hasher = Sha256::new();
1250    let mut buffer = [0u8; 8192];
1251
1252    loop {
1253        let bytes_read = reader.read(&mut buffer)?;
1254        if bytes_read == 0 {
1255            break;
1256        }
1257        hasher.update(&buffer[..bytes_read]);
1258    }
1259
1260    // sha2 ≥ 0.11 dropped `LowerHex` on the digest output;
1261    // `hex::encode` produces the same lowercase-hex representation.
1262    Ok(hex::encode(hasher.finalize()))
1263}
1264
1265/// Collect all files in a directory recursively
1266fn collect_all_files(dir: &Path) -> Result<Vec<String>> {
1267    let mut files = Vec::new();
1268    collect_files_recursive(dir, dir, &mut files)?;
1269    Ok(files)
1270}
1271
1272fn collect_files_recursive(base: &Path, current: &Path, files: &mut Vec<String>) -> Result<()> {
1273    for entry in fs::read_dir(current)? {
1274        let entry = entry?;
1275        let path = entry.path();
1276        let metadata = fs::symlink_metadata(&path)?;
1277        let file_type = metadata.file_type();
1278
1279        if file_type.is_symlink() {
1280            if let Ok(rel) = path.strip_prefix(base) {
1281                files.push(rel.to_string_lossy().replace('\\', "/"));
1282            }
1283            continue;
1284        }
1285
1286        if file_type.is_dir() {
1287            collect_files_recursive(base, &path, files)?;
1288        } else if file_type.is_file()
1289            && let Ok(rel) = path.strip_prefix(base)
1290        {
1291            files.push(rel.to_string_lossy().replace('\\', "/"));
1292        }
1293    }
1294    Ok(())
1295}
1296
1297/// Calculate total size of a directory
1298fn calculate_dir_size(dir: &Path) -> Result<u64> {
1299    let mut total = 0u64;
1300
1301    fn calc_recursive(path: &Path, total: &mut u64) -> Result<()> {
1302        let metadata = fs::symlink_metadata(path)?;
1303        let file_type = metadata.file_type();
1304
1305        if file_type.is_symlink() {
1306            return Ok(());
1307        }
1308
1309        if file_type.is_dir() {
1310            for entry in fs::read_dir(path)? {
1311                calc_recursive(&entry?.path(), total)?;
1312            }
1313        } else if file_type.is_file() {
1314            *total += metadata.len();
1315        }
1316        Ok(())
1317    }
1318
1319    calc_recursive(dir, &mut total)?;
1320    Ok(total)
1321}
1322
1323/// Print verification result in human-readable format
1324pub fn print_result(result: &VerifyResult, verbose: bool) {
1325    let status_icon = if result.status == "valid" {
1326        "✓"
1327    } else {
1328        "✗"
1329    };
1330    println!(
1331        "\n{} Bundle status: {}",
1332        status_icon,
1333        result.status.to_uppercase()
1334    );
1335
1336    println!("\nChecks:");
1337    print_check("  Required files", &result.checks.required_files, verbose);
1338    print_check("  Config schema", &result.checks.config_schema, verbose);
1339    print_check(
1340        "  Payload manifest",
1341        &result.checks.payload_manifest,
1342        verbose,
1343    );
1344    print_check("  Size limits", &result.checks.size_limits, verbose);
1345    print_check("  Integrity", &result.checks.integrity, verbose);
1346    print_check("  No secrets", &result.checks.no_secrets_in_site, verbose);
1347
1348    if !result.warnings.is_empty() {
1349        println!("\nWarnings:");
1350        for warning in &result.warnings {
1351            println!("  ⚠ {}", warning);
1352        }
1353    }
1354
1355    println!(
1356        "\nTotal site size: {} bytes ({:.2} MB)",
1357        result.site_size_bytes,
1358        result.site_size_bytes as f64 / (1024.0 * 1024.0)
1359    );
1360}
1361
1362fn print_check(name: &str, result: &CheckResult, verbose: bool) {
1363    let icon = if result.passed { "✓" } else { "✗" };
1364    print!("{}: {} ", name, icon);
1365
1366    if result.passed {
1367        println!("OK");
1368    } else if let Some(details) = &result.details {
1369        if verbose {
1370            println!("FAILED");
1371            println!("      {}", details);
1372        } else {
1373            // Truncate long error messages (char-safe slicing)
1374            let display = if details.chars().count() > 60 {
1375                let truncated: String = details.chars().take(60).collect();
1376                format!("{truncated}...")
1377            } else {
1378                details.clone()
1379            };
1380            println!("FAILED: {}", display);
1381        }
1382    } else {
1383        println!("FAILED");
1384    }
1385}
1386
1387#[cfg(test)]
1388mod tests {
1389    use super::*;
1390    use crate::pages::bundle::IntegrityEntry;
1391    use std::collections::BTreeMap;
1392    use std::path::PathBuf;
1393    use tempfile::TempDir;
1394
1395    /// Path to the pages_verify fixtures directory
1396    fn fixtures_dir() -> PathBuf {
1397        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/pages_verify")
1398    }
1399
1400    /// Copy a fixture directory to the destination.
1401    /// `fixture_name` is the subdirectory under tests/fixtures/pages_verify/ (e.g., "valid", "unencrypted")
1402    fn copy_fixture(fixture_name: &str, dest: &Path) -> Result<()> {
1403        let src = fixtures_dir().join(fixture_name).join("site");
1404        copy_dir_recursive(&src, dest)
1405    }
1406
1407    /// Recursively copy a directory and its contents
1408    fn copy_dir_recursive(src: &Path, dest: &Path) -> Result<()> {
1409        if !dest.exists() {
1410            fs::create_dir_all(dest)?;
1411        }
1412        for entry in fs::read_dir(src)? {
1413            let entry = entry?;
1414            let file_type = entry.file_type()?;
1415            let dest_path = dest.join(entry.file_name());
1416            if file_type.is_dir() {
1417                copy_dir_recursive(&entry.path(), &dest_path)?;
1418            } else {
1419                fs::copy(entry.path(), &dest_path)?;
1420            }
1421        }
1422        Ok(())
1423    }
1424
1425    fn assert_integrity_path_blocked(rel_path: &str) {
1426        let temp = TempDir::new().unwrap();
1427        let site_dir = temp.path();
1428
1429        let mut files = BTreeMap::new();
1430        files.insert(
1431            rel_path.to_string(),
1432            IntegrityEntry {
1433                sha256: "deadbeef".repeat(8),
1434                size: 100,
1435            },
1436        );
1437        let manifest = IntegrityManifest {
1438            version: 1,
1439            generated_at: "2025-01-01T00:00:00Z".to_string(),
1440            files,
1441        };
1442        let manifest_json = serde_json::to_string(&manifest).unwrap();
1443        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
1444
1445        let result = check_integrity(site_dir, false);
1446        assert!(!result.passed, "Path should be blocked: {rel_path}");
1447        assert!(
1448            result
1449                .details
1450                .as_ref()
1451                .map(|d| d.contains("security violation"))
1452                .unwrap_or(false),
1453            "Should mention security violation"
1454        );
1455    }
1456
1457    #[test]
1458    #[cfg(unix)]
1459    fn test_collect_all_files_lists_symlink_without_recursing() {
1460        use std::os::unix::fs::symlink;
1461
1462        let temp = TempDir::new().unwrap();
1463        let outside = TempDir::new().unwrap();
1464
1465        fs::write(temp.path().join("root.txt"), "root").unwrap();
1466        fs::create_dir_all(outside.path().join("nested")).unwrap();
1467        fs::write(outside.path().join("nested/hidden.txt"), "hidden").unwrap();
1468        symlink(
1469            outside.path().join("nested"),
1470            temp.path().join("linked-dir"),
1471        )
1472        .unwrap();
1473
1474        let files = collect_all_files(temp.path()).unwrap();
1475        assert!(files.contains(&"root.txt".to_string()));
1476        assert!(files.contains(&"linked-dir".to_string()));
1477        assert!(!files.iter().any(|f| f.starts_with("linked-dir/")));
1478    }
1479
1480    #[test]
1481    #[cfg(unix)]
1482    fn test_calculate_dir_size_skips_symlink_targets() {
1483        use std::os::unix::fs::symlink;
1484
1485        let temp = TempDir::new().unwrap();
1486        let outside = TempDir::new().unwrap();
1487
1488        fs::write(temp.path().join("small.txt"), vec![0u8; 8]).unwrap();
1489        fs::write(outside.path().join("large.bin"), vec![0u8; 8192]).unwrap();
1490        symlink(
1491            outside.path().join("large.bin"),
1492            temp.path().join("linked.bin"),
1493        )
1494        .unwrap();
1495
1496        let size = calculate_dir_size(temp.path()).unwrap();
1497        assert_eq!(size, 8);
1498    }
1499
1500    #[test]
1501    #[cfg(unix)]
1502    fn test_integrity_rejects_symlink_manifest_entry_to_directory() {
1503        use std::os::unix::fs::symlink;
1504
1505        let temp = TempDir::new().unwrap();
1506        let site_dir = temp.path();
1507        fs::create_dir_all(site_dir.join("payload/real-dir")).unwrap();
1508        fs::write(site_dir.join("payload/real-dir/content.txt"), b"payload").unwrap();
1509        symlink(
1510            site_dir.join("payload/real-dir"),
1511            site_dir.join("payload/alias-dir"),
1512        )
1513        .unwrap();
1514
1515        let mut files = BTreeMap::new();
1516        files.insert(
1517            "payload/alias-dir".to_string(),
1518            IntegrityEntry {
1519                // Hash/size are irrelevant here; verification should fail before hashing.
1520                sha256: "deadbeef".repeat(8),
1521                size: 0,
1522            },
1523        );
1524        let manifest = IntegrityManifest {
1525            version: 1,
1526            generated_at: "2025-01-01T00:00:00Z".to_string(),
1527            files,
1528        };
1529        fs::write(
1530            site_dir.join("integrity.json"),
1531            serde_json::to_string(&manifest).unwrap(),
1532        )
1533        .unwrap();
1534
1535        let result = check_integrity(site_dir, false);
1536        assert!(!result.passed);
1537        assert!(
1538            result
1539                .details
1540                .as_ref()
1541                .map(|d| d.contains("not a regular file"))
1542                .unwrap_or(false)
1543        );
1544    }
1545
1546    #[test]
1547    fn test_verify_minimal_valid_site() {
1548        let temp = TempDir::new().unwrap();
1549        let site_dir = temp.path().join("site");
1550
1551        // Copy the valid fixture to temp directory
1552        copy_fixture("valid", &site_dir).unwrap();
1553
1554        let result = verify_bundle(&site_dir, true).unwrap();
1555
1556        // Debug: print which checks failed
1557        if !result.checks.required_files.passed {
1558            eprintln!(
1559                "FAILED: required_files - {:?}",
1560                result.checks.required_files.details
1561            );
1562        }
1563        if !result.checks.config_schema.passed {
1564            eprintln!(
1565                "FAILED: config_schema - {:?}",
1566                result.checks.config_schema.details
1567            );
1568        }
1569        if !result.checks.payload_manifest.passed {
1570            eprintln!(
1571                "FAILED: payload_manifest - {:?}",
1572                result.checks.payload_manifest.details
1573            );
1574        }
1575        if !result.checks.size_limits.passed {
1576            eprintln!(
1577                "FAILED: size_limits - {:?}",
1578                result.checks.size_limits.details
1579            );
1580        }
1581        if !result.checks.integrity.passed {
1582            eprintln!("FAILED: integrity - {:?}", result.checks.integrity.details);
1583        }
1584        if !result.checks.no_secrets_in_site.passed {
1585            eprintln!(
1586                "FAILED: no_secrets_in_site - {:?}",
1587                result.checks.no_secrets_in_site.details
1588            );
1589        }
1590
1591        assert_eq!(result.status, "valid");
1592        assert!(result.checks.required_files.passed);
1593        assert!(result.checks.config_schema.passed);
1594    }
1595
1596    #[test]
1597    fn test_config_schema_allows_zero_chunk_encrypted_archive() {
1598        let temp = TempDir::new().unwrap();
1599        let site_dir = temp.path().join("site");
1600        fs::create_dir_all(&site_dir).unwrap();
1601
1602        let config = r#"{
1603          "version": 2,
1604          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
1605          "base_nonce": "AAAAAAAAAAAAAAAA",
1606          "compression": "deflate",
1607          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
1608          "payload": {
1609            "chunk_size": 1024,
1610            "chunk_count": 0,
1611            "total_compressed_size": 0,
1612            "total_plaintext_size": 0,
1613            "files": []
1614          },
1615          "key_slots": [{
1616            "id": 0,
1617            "slot_type": "password",
1618            "kdf": "argon2id",
1619            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
1620            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
1621            "nonce": "AAAAAAAAAAAAAAAA",
1622            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
1623          }]
1624        }"#;
1625        fs::write(site_dir.join("config.json"), config).unwrap();
1626
1627        let result = check_config_schema(&site_dir);
1628        assert!(
1629            result.passed,
1630            "zero-chunk encrypted config should match Rust/worker validators: {:?}",
1631            result.details
1632        );
1633    }
1634
1635    #[test]
1636    fn test_verify_unencrypted_site() {
1637        let temp = TempDir::new().unwrap();
1638        let site_dir = temp.path().join("site");
1639
1640        // Copy the unencrypted fixture to temp directory
1641        copy_fixture("unencrypted", &site_dir).unwrap();
1642
1643        let result = verify_bundle(&site_dir, true).unwrap();
1644        assert!(result.checks.config_schema.passed);
1645        assert!(result.checks.payload_manifest.passed);
1646        assert_eq!(result.status, "valid");
1647    }
1648
1649    #[test]
1650    fn test_verify_missing_required_files() {
1651        let temp = TempDir::new().unwrap();
1652        let site_dir = temp.path().join("site");
1653
1654        // Copy the missing_required_no_viewer fixture (missing viewer.js)
1655        copy_fixture("missing_required_no_viewer", &site_dir).unwrap();
1656
1657        let result = verify_bundle(&site_dir, false).unwrap();
1658        assert_eq!(result.status, "invalid");
1659        assert!(!result.checks.required_files.passed);
1660    }
1661
1662    #[test]
1663    fn test_verify_rejects_required_file_replaced_by_directory() {
1664        let temp = TempDir::new().unwrap();
1665        let site_dir = temp.path().join("site");
1666        let viewer_backup = temp.path().join("viewer.js.backup");
1667
1668        copy_fixture("valid", &site_dir).unwrap();
1669        fs::rename(site_dir.join("viewer.js"), &viewer_backup).unwrap();
1670        fs::create_dir(site_dir.join("viewer.js")).unwrap();
1671
1672        let mut manifest: IntegrityManifest = serde_json::from_reader(BufReader::new(
1673            File::open(site_dir.join("integrity.json")).unwrap(),
1674        ))
1675        .unwrap();
1676        manifest.files.remove("viewer.js");
1677        fs::write(
1678            site_dir.join("integrity.json"),
1679            serde_json::to_string(&manifest).unwrap(),
1680        )
1681        .unwrap();
1682
1683        let result = verify_bundle(&site_dir, false).unwrap();
1684        assert_eq!(result.status, "invalid");
1685        assert!(!result.checks.required_files.passed);
1686        assert!(
1687            result
1688                .checks
1689                .required_files
1690                .details
1691                .as_ref()
1692                .map(|details| details.contains("viewer.js (must be a regular file)"))
1693                .unwrap_or(false),
1694            "required file directories should be rejected: {:?}",
1695            result.checks.required_files.details
1696        );
1697    }
1698
1699    #[test]
1700    #[cfg(unix)]
1701    fn test_required_files_reject_symlinked_regular_file() -> Result<()> {
1702        use std::os::unix::fs::symlink;
1703
1704        let temp = TempDir::new()?;
1705        let site_dir = temp.path().join("site");
1706        let outside = TempDir::new()?;
1707        copy_fixture("valid", &site_dir)?;
1708        fs::rename(
1709            site_dir.join("viewer.js"),
1710            outside.path().join("viewer-original.js"),
1711        )?;
1712        fs::write(outside.path().join("viewer.js"), "outside viewer")?;
1713        symlink(outside.path().join("viewer.js"), site_dir.join("viewer.js"))?;
1714
1715        let result = check_required_files(&site_dir);
1716
1717        if result.passed {
1718            anyhow::bail!("symlinked required file was accepted");
1719        }
1720        match result.details.as_deref() {
1721            Some(details) if details.contains("viewer.js (must be a regular file)") => Ok(()),
1722            details => anyhow::bail!(
1723                "symlinked required files should be rejected; got details: {:?}",
1724                details
1725            ),
1726        }
1727    }
1728
1729    #[test]
1730    fn test_verify_invalid_config() {
1731        let temp = TempDir::new().unwrap();
1732        let site_dir = temp.path().join("site");
1733
1734        // Copy valid fixture then overwrite config with invalid one
1735        copy_fixture("valid", &site_dir).unwrap();
1736
1737        // Write invalid config
1738        fs::write(
1739            site_dir.join("config.json"),
1740            r#"{"version": 2, "export_id": "invalid"}"#,
1741        )
1742        .unwrap();
1743
1744        let result = verify_bundle(&site_dir, false).unwrap();
1745        assert!(!result.checks.config_schema.passed);
1746    }
1747
1748    #[test]
1749    fn test_verify_rejects_unsupported_encrypted_compression() {
1750        for compression in ["zstd", "none"] {
1751            let temp = TempDir::new().unwrap();
1752            let site_dir = temp.path().join("site");
1753
1754            copy_fixture("valid", &site_dir).unwrap();
1755            let config_path = site_dir.join("config.json");
1756            let mut config: Value =
1757                serde_json::from_str(&fs::read_to_string(&config_path).unwrap()).unwrap();
1758            config["compression"] = Value::String(compression.to_string());
1759            fs::write(&config_path, serde_json::to_string_pretty(&config).unwrap()).unwrap();
1760
1761            let result = check_config_schema(&site_dir);
1762
1763            assert!(
1764                !result.passed,
1765                "{compression} should fail schema validation"
1766            );
1767            let details = result.details.unwrap_or_default();
1768            assert!(
1769                details.contains("supports only deflate") && details.contains(compression),
1770                "unexpected validation details for {compression}: {details}"
1771            );
1772        }
1773    }
1774
1775    #[test]
1776    fn test_verify_rejects_unsupported_encrypted_schema_version() {
1777        let temp = TempDir::new().unwrap();
1778        let site_dir = temp.path().join("site");
1779
1780        copy_fixture("valid", &site_dir).unwrap();
1781        let config_path = site_dir.join("config.json");
1782        let mut config: Value =
1783            serde_json::from_str(&fs::read_to_string(&config_path).unwrap()).unwrap();
1784        config["version"] = Value::from(1);
1785        fs::write(&config_path, serde_json::to_string_pretty(&config).unwrap()).unwrap();
1786
1787        let result = check_config_schema(&site_dir);
1788
1789        assert!(!result.passed, "unsupported schema version should fail");
1790        let details = result.details.unwrap_or_default();
1791        assert!(
1792            details.contains("version must be 2") && details.contains("got 1"),
1793            "unexpected validation details: {details}"
1794        );
1795    }
1796
1797    #[test]
1798    fn test_verify_rejects_unknown_config_fields() {
1799        let temp = TempDir::new().unwrap();
1800        let site_dir = temp.path().join("site");
1801
1802        copy_fixture("valid", &site_dir).unwrap();
1803        fs::write(
1804            site_dir.join("config.json"),
1805            r#"{
1806                "encrypted": false,
1807                "version": "1.0",
1808                "payload": {
1809                    "path": "payload/data.sqlite",
1810                    "format": "sqlite"
1811                },
1812                "totally_unknown_field": 123
1813            }"#,
1814        )
1815        .unwrap();
1816
1817        let result = verify_bundle(&site_dir, false).unwrap();
1818        assert!(!result.checks.config_schema.passed);
1819        assert!(
1820            result
1821                .checks
1822                .config_schema
1823                .details
1824                .as_ref()
1825                .map(|details| details.contains("unknown field"))
1826                .unwrap_or(false),
1827            "unknown config fields should fail schema validation: {:?}",
1828            result.checks.config_schema.details
1829        );
1830    }
1831
1832    #[test]
1833    fn test_verify_secret_leakage() {
1834        let temp = TempDir::new().unwrap();
1835        let site_dir = temp.path().join("site");
1836
1837        // Copy the secret_leak fixture (contains recovery-secret.txt)
1838        copy_fixture("secret_leak", &site_dir).unwrap();
1839
1840        let result = verify_bundle(&site_dir, false).unwrap();
1841        assert!(!result.checks.no_secrets_in_site.passed);
1842    }
1843
1844    #[test]
1845    fn test_check_no_secrets_flags_nested_config_secret_key_with_whitespace() {
1846        let temp = TempDir::new().unwrap();
1847        let site_dir = temp.path().join("site");
1848        fs::create_dir_all(&site_dir).unwrap();
1849        fs::write(
1850            site_dir.join("config.json"),
1851            r#"{
1852                "encrypted": false,
1853                "version": "1.0",
1854                "payload": { "path": "payload/data.sqlite", "format": "sqlite" },
1855                "metadata": { "secret" : "leaked" }
1856            }"#,
1857        )
1858        .unwrap();
1859
1860        let result = check_no_secrets(&site_dir);
1861        assert!(!result.passed);
1862        assert!(
1863            result
1864                .details
1865                .as_ref()
1866                .map(|details| {
1867                    details.contains(
1868                        "config.json contains forbidden field: secret field at metadata.secret",
1869                    )
1870                })
1871                .unwrap_or(false),
1872            "nested secret key with whitespace should be detected: {:?}",
1873            result.details
1874        );
1875    }
1876
1877    #[test]
1878    fn test_check_no_secrets_flags_forbidden_config_key_inside_array() {
1879        let temp = TempDir::new().unwrap();
1880        let site_dir = temp.path().join("site");
1881        fs::create_dir_all(&site_dir).unwrap();
1882        fs::write(
1883            site_dir.join("config.json"),
1884            r#"{
1885                "encrypted": false,
1886                "version": "1.0",
1887                "payload": { "path": "payload/data.sqlite", "format": "sqlite" },
1888                "metadata": [{ "private_key" : "leaked" }]
1889            }"#,
1890        )
1891        .unwrap();
1892
1893        let result = check_no_secrets(&site_dir);
1894        assert!(!result.passed);
1895        assert!(
1896            result
1897                .details
1898                .as_ref()
1899                .map(|details| {
1900                    details.contains(
1901                        "config.json contains forbidden field: private_key field at metadata[0].private_key",
1902                    )
1903                })
1904                .unwrap_or(false),
1905            "forbidden key inside arrays should be detected: {:?}",
1906            result.details
1907        );
1908    }
1909
1910    #[test]
1911    #[cfg(unix)]
1912    fn test_check_no_secrets_does_not_follow_symlinked_directories() {
1913        use std::os::unix::fs::symlink;
1914
1915        let temp = TempDir::new().unwrap();
1916        let site_dir = temp.path().join("site");
1917        let outside_dir = temp.path().join("outside");
1918        fs::create_dir_all(&site_dir).unwrap();
1919        fs::create_dir_all(outside_dir.join("private")).unwrap();
1920        fs::write(outside_dir.join("private/recovery-secret.txt"), "secret").unwrap();
1921        symlink(&outside_dir, site_dir.join("linked-assets")).unwrap();
1922
1923        let result = check_no_secrets(&site_dir);
1924        assert!(
1925            result.passed,
1926            "symlink targets outside site/ should not be scanned as in-tree secrets: {:?}",
1927            result.details
1928        );
1929    }
1930
1931    #[test]
1932    #[cfg(unix)]
1933    fn test_check_no_secrets_flags_secret_named_symlink_without_recursing() {
1934        use std::os::unix::fs::symlink;
1935
1936        let temp = TempDir::new().unwrap();
1937        let site_dir = temp.path().join("site");
1938        let benign_dir = temp.path().join("benign");
1939        fs::create_dir_all(site_dir.join("nested")).unwrap();
1940        fs::create_dir_all(&benign_dir).unwrap();
1941        symlink(&benign_dir, site_dir.join("nested/private")).unwrap();
1942
1943        let result = check_no_secrets(&site_dir);
1944        assert!(!result.passed);
1945        assert!(
1946            result
1947                .details
1948                .as_ref()
1949                .map(|details| {
1950                    details.contains("Secret directory found in site subdirectory: nested/private/")
1951                })
1952                .unwrap_or(false),
1953            "secret-named symlink should still be reported: {:?}",
1954            result.details
1955        );
1956    }
1957
1958    #[test]
1959    #[cfg(unix)]
1960    fn test_check_no_secrets_flags_top_level_secret_file_broken_symlink() {
1961        use std::os::unix::fs::symlink;
1962
1963        let temp = TempDir::new().unwrap();
1964        let site_dir = temp.path().join("site");
1965        fs::create_dir_all(&site_dir).unwrap();
1966        symlink(
1967            temp.path().join("missing-recovery-secret"),
1968            site_dir.join("recovery-secret.txt"),
1969        )
1970        .unwrap();
1971
1972        let result = check_no_secrets(&site_dir);
1973        assert!(!result.passed);
1974        assert!(
1975            result
1976                .details
1977                .as_ref()
1978                .map(|details| details.contains("Secret file found in site/: recovery-secret.txt"))
1979                .unwrap_or(false),
1980            "top-level dangling secret symlink should still be reported: {:?}",
1981            result.details
1982        );
1983    }
1984
1985    #[test]
1986    #[cfg(unix)]
1987    fn test_check_no_secrets_flags_top_level_secret_dir_broken_symlink() {
1988        use std::os::unix::fs::symlink;
1989
1990        let temp = TempDir::new().unwrap();
1991        let site_dir = temp.path().join("site");
1992        fs::create_dir_all(&site_dir).unwrap();
1993        symlink(
1994            temp.path().join("missing-private"),
1995            site_dir.join("private"),
1996        )
1997        .unwrap();
1998
1999        let result = check_no_secrets(&site_dir);
2000        assert!(!result.passed);
2001        assert!(
2002            result
2003                .details
2004                .as_ref()
2005                .map(|details| details.contains("Secret directory found in site/: private/"))
2006                .unwrap_or(false),
2007            "top-level dangling private symlink should still be reported: {:?}",
2008            result.details
2009        );
2010    }
2011
2012    #[test]
2013    fn test_verify_with_integrity() {
2014        let temp = TempDir::new().unwrap();
2015        let site_dir = temp.path().join("site");
2016
2017        // Copy valid fixture
2018        copy_fixture("valid", &site_dir).unwrap();
2019
2020        // Create integrity.json
2021        let mut files = BTreeMap::new();
2022        for file in REQUIRED_FILES {
2023            let hash = compute_file_hash(&site_dir.join(file)).unwrap();
2024            let size = fs::metadata(site_dir.join(file)).unwrap().len();
2025            files.insert(file.to_string(), IntegrityEntry { sha256: hash, size });
2026        }
2027        // Add payload chunk
2028        let chunk_hash = compute_file_hash(&site_dir.join("payload/chunk-00000.bin")).unwrap();
2029        let chunk_size = fs::metadata(site_dir.join("payload/chunk-00000.bin"))
2030            .unwrap()
2031            .len();
2032        files.insert(
2033            "payload/chunk-00000.bin".to_string(),
2034            IntegrityEntry {
2035                sha256: chunk_hash,
2036                size: chunk_size,
2037            },
2038        );
2039
2040        let manifest = IntegrityManifest {
2041            version: 1,
2042            generated_at: "2024-01-01T00:00:00Z".to_string(),
2043            files,
2044        };
2045        fs::write(
2046            site_dir.join("integrity.json"),
2047            serde_json::to_string_pretty(&manifest).unwrap(),
2048        )
2049        .unwrap();
2050
2051        let result = verify_bundle(&site_dir, false).unwrap();
2052        assert!(result.checks.integrity.passed);
2053    }
2054
2055    #[test]
2056    fn test_verify_integrity_mismatch() {
2057        let temp = TempDir::new().unwrap();
2058        let site_dir = temp.path().join("site");
2059
2060        // Copy valid fixture
2061        copy_fixture("valid", &site_dir).unwrap();
2062
2063        // Create integrity.json with wrong hash
2064        let mut files = BTreeMap::new();
2065        files.insert(
2066            "index.html".to_string(),
2067            IntegrityEntry {
2068                sha256: "0000000000000000000000000000000000000000000000000000000000000000"
2069                    .to_string(),
2070                size: 10,
2071            },
2072        );
2073
2074        let manifest = IntegrityManifest {
2075            version: 1,
2076            generated_at: "2024-01-01T00:00:00Z".to_string(),
2077            files,
2078        };
2079        fs::write(
2080            site_dir.join("integrity.json"),
2081            serde_json::to_string_pretty(&manifest).unwrap(),
2082        )
2083        .unwrap();
2084
2085        let result = verify_bundle(&site_dir, false).unwrap();
2086        assert!(!result.checks.integrity.passed);
2087        let details = result.checks.integrity.details.as_ref().unwrap();
2088        assert!(
2089            details.contains("Size mismatch") || details.contains("Hash mismatch"),
2090            "expected size or hash mismatch, got: {details}"
2091        );
2092    }
2093
2094    #[test]
2095    fn test_resolve_site_dir() {
2096        let temp = TempDir::new().unwrap();
2097
2098        // Test with site/ subdirectory
2099        let site_dir = temp.path().join("site");
2100        fs::create_dir_all(&site_dir).unwrap();
2101
2102        let resolved = crate::pages::resolve_site_dir(temp.path()).unwrap();
2103        assert!(resolved.ends_with("site"));
2104
2105        // Test with direct path
2106        let resolved_direct = crate::pages::resolve_site_dir(&site_dir).unwrap();
2107        assert_eq!(resolved_direct, site_dir);
2108    }
2109
2110    #[test]
2111    #[cfg(unix)]
2112    fn test_resolve_site_dir_rejects_symlinked_site_directory() {
2113        use std::os::unix::fs::symlink;
2114
2115        let bundle_root = TempDir::new().unwrap();
2116        let outside = TempDir::new().unwrap();
2117        let outside_site = outside.path().join("site");
2118        fs::create_dir_all(&outside_site).unwrap();
2119        fs::write(outside_site.join("index.html"), "<html></html>").unwrap();
2120        symlink(&outside_site, bundle_root.path().join("site")).unwrap();
2121
2122        let err = crate::pages::resolve_site_dir(bundle_root.path())
2123            .unwrap_err()
2124            .to_string();
2125        assert!(err.contains("must not be a symlink"));
2126
2127        let direct_err = crate::pages::resolve_site_dir(&bundle_root.path().join("site"))
2128            .unwrap_err()
2129            .to_string();
2130        assert!(direct_err.contains("must not be a symlink"));
2131    }
2132
2133    #[test]
2134    fn test_chunk_size_limit() {
2135        let temp = TempDir::new().unwrap();
2136        let site_dir = temp.path();
2137        let payload_dir = site_dir.join("payload");
2138        fs::create_dir_all(&payload_dir).unwrap();
2139
2140        // Create config.json for encrypted archive (required by check_size_limits)
2141        let config = r#"{
2142          "version": 2,
2143          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2144          "base_nonce": "AAAAAAAAAAAAAAAA",
2145          "compression": "deflate",
2146          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2147          "payload": {
2148            "chunk_size": 1024,
2149            "chunk_count": 1,
2150            "total_compressed_size": 14,
2151            "total_plaintext_size": 100,
2152            "files": ["payload/chunk-00000.bin"]
2153          },
2154          "key_slots": [{
2155            "id": 0,
2156            "slot_type": "password",
2157            "kdf": "argon2id",
2158            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2159            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2160            "nonce": "AAAAAAAAAAAAAAAA",
2161            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2162          }]
2163        }"#;
2164        fs::write(site_dir.join("config.json"), config).unwrap();
2165
2166        // Create a small file (should pass)
2167        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2168
2169        let result = check_size_limits(site_dir);
2170        assert!(result.passed);
2171    }
2172
2173    #[test]
2174    fn test_payload_manifest_rejects_unexpected_high_chunk_index() {
2175        let temp = TempDir::new().unwrap();
2176        let site_dir = temp.path();
2177        let payload_dir = site_dir.join("payload");
2178        fs::create_dir_all(&payload_dir).unwrap();
2179
2180        let config = r#"{
2181          "version": 2,
2182          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2183          "base_nonce": "AAAAAAAAAAAAAAAA",
2184          "compression": "deflate",
2185          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2186          "payload": {
2187            "chunk_size": 1024,
2188            "chunk_count": 1,
2189            "total_compressed_size": 14,
2190            "total_plaintext_size": 100,
2191            "files": ["payload/chunk-00000.bin"]
2192          },
2193          "key_slots": [{
2194            "id": 0,
2195            "slot_type": "password",
2196            "kdf": "argon2id",
2197            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2198            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2199            "nonce": "AAAAAAAAAAAAAAAA",
2200            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2201          }]
2202        }"#;
2203        fs::write(site_dir.join("config.json"), config).unwrap();
2204
2205        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2206        fs::write(payload_dir.join("chunk-99999.bin"), "unexpected").unwrap();
2207
2208        let result = check_payload_manifest(site_dir);
2209        assert!(!result.passed);
2210        let details = result.details.unwrap_or_default();
2211        assert!(details.contains("Unexpected chunk file index: chunk-99999.bin"));
2212    }
2213
2214    #[test]
2215    fn test_payload_manifest_rejects_non_file_chunk_entry() {
2216        let temp = TempDir::new().unwrap();
2217        let site_dir = temp.path();
2218        let payload_dir = site_dir.join("payload");
2219        fs::create_dir_all(&payload_dir).unwrap();
2220
2221        let config = r#"{
2222          "version": 2,
2223          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2224          "base_nonce": "AAAAAAAAAAAAAAAA",
2225          "compression": "deflate",
2226          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2227          "payload": {
2228            "chunk_size": 1024,
2229            "chunk_count": 1,
2230            "total_compressed_size": 14,
2231            "total_plaintext_size": 100,
2232            "files": ["payload/chunk-00000.bin"]
2233          },
2234          "key_slots": [{
2235            "id": 0,
2236            "slot_type": "password",
2237            "kdf": "argon2id",
2238            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2239            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2240            "nonce": "AAAAAAAAAAAAAAAA",
2241            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2242          }]
2243        }"#;
2244        fs::write(site_dir.join("config.json"), config).unwrap();
2245        fs::create_dir_all(payload_dir.join("chunk-00000.bin")).unwrap();
2246
2247        let result = check_payload_manifest(site_dir);
2248        assert!(!result.passed);
2249        assert!(
2250            result
2251                .details
2252                .as_ref()
2253                .map(|d| d.contains("payload/chunk-00000.bin must be a regular file"))
2254                .unwrap_or(false)
2255        );
2256    }
2257
2258    #[test]
2259    fn test_payload_manifest_rejects_malformed_chunk_filename() {
2260        let temp = TempDir::new().unwrap();
2261        let site_dir = temp.path();
2262        let payload_dir = site_dir.join("payload");
2263        fs::create_dir_all(&payload_dir).unwrap();
2264
2265        let config = r#"{
2266          "version": 2,
2267          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2268          "base_nonce": "AAAAAAAAAAAAAAAA",
2269          "compression": "deflate",
2270          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2271          "payload": {
2272            "chunk_size": 1024,
2273            "chunk_count": 1,
2274            "total_compressed_size": 14,
2275            "total_plaintext_size": 100,
2276            "files": ["payload/chunk-00000.bin"]
2277          },
2278          "key_slots": [{
2279            "id": 0,
2280            "slot_type": "password",
2281            "kdf": "argon2id",
2282            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2283            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2284            "nonce": "AAAAAAAAAAAAAAAA",
2285            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2286          }]
2287        }"#;
2288        fs::write(site_dir.join("config.json"), config).unwrap();
2289        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2290        fs::write(payload_dir.join("chunk-1.bin"), "malformed").unwrap();
2291
2292        let result = check_payload_manifest(site_dir);
2293        assert!(!result.passed);
2294        assert!(
2295            result
2296                .details
2297                .as_ref()
2298                .map(|d| d.contains("Malformed chunk filename: chunk-1.bin"))
2299                .unwrap_or(false)
2300        );
2301    }
2302
2303    #[test]
2304    fn test_payload_manifest_treats_six_digit_chunk_name_as_unexpected_not_malformed() {
2305        let temp = TempDir::new().unwrap();
2306        let site_dir = temp.path();
2307        let payload_dir = site_dir.join("payload");
2308        fs::create_dir_all(&payload_dir).unwrap();
2309
2310        let config = r#"{
2311          "version": 2,
2312          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2313          "base_nonce": "AAAAAAAAAAAAAAAA",
2314          "compression": "deflate",
2315          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2316          "payload": {
2317            "chunk_size": 1024,
2318            "chunk_count": 1,
2319            "total_compressed_size": 14,
2320            "total_plaintext_size": 100,
2321            "files": ["payload/chunk-00000.bin"]
2322          },
2323          "key_slots": [{
2324            "id": 0,
2325            "slot_type": "password",
2326            "kdf": "argon2id",
2327            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2328            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2329            "nonce": "AAAAAAAAAAAAAAAA",
2330            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2331          }]
2332        }"#;
2333        fs::write(site_dir.join("config.json"), config).unwrap();
2334        fs::write(payload_dir.join("chunk-00000.bin"), "small").unwrap();
2335        fs::write(payload_dir.join("chunk-100000.bin"), "unexpected").unwrap();
2336
2337        let result = check_payload_manifest(site_dir);
2338        assert!(!result.passed);
2339        let details = result.details.unwrap_or_default();
2340        assert!(details.contains("Unexpected chunk file index: chunk-100000.bin"));
2341        assert!(!details.contains("Malformed chunk filename: chunk-100000.bin"));
2342    }
2343
2344    #[test]
2345    fn test_unencrypted_payload_must_be_regular_file() {
2346        let temp = TempDir::new().unwrap();
2347        let site_dir = temp.path();
2348        let payload_dir = site_dir.join("payload");
2349        fs::create_dir_all(&payload_dir).unwrap();
2350        fs::create_dir_all(payload_dir.join("data.sqlite")).unwrap();
2351
2352        let config = r#"{
2353          "encrypted": false,
2354          "version": "1.0",
2355          "payload": {
2356            "path": "payload/data.sqlite",
2357            "format": "sqlite"
2358          }
2359        }"#;
2360        fs::write(site_dir.join("config.json"), config).unwrap();
2361
2362        let manifest_result = check_payload_manifest(site_dir);
2363        assert!(!manifest_result.passed);
2364        assert!(
2365            manifest_result
2366                .details
2367                .as_ref()
2368                .map(|d| d.contains("payload/data.sqlite must be a regular file"))
2369                .unwrap_or(false)
2370        );
2371
2372        let size_result = check_size_limits(site_dir);
2373        assert!(!size_result.passed);
2374        assert!(
2375            size_result
2376                .details
2377                .as_ref()
2378                .map(|d| d.contains("payload/data.sqlite must be a regular file"))
2379                .unwrap_or(false)
2380        );
2381    }
2382
2383    #[test]
2384    fn test_size_limits_rejects_non_file_chunk_entry() {
2385        let temp = TempDir::new().unwrap();
2386        let site_dir = temp.path();
2387        let payload_dir = site_dir.join("payload");
2388        fs::create_dir_all(&payload_dir).unwrap();
2389
2390        let config = r#"{
2391          "version": 2,
2392          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2393          "base_nonce": "AAAAAAAAAAAAAAAA",
2394          "compression": "deflate",
2395          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2396          "payload": {
2397            "chunk_size": 1024,
2398            "chunk_count": 1,
2399            "total_compressed_size": 14,
2400            "total_plaintext_size": 100,
2401            "files": ["payload/chunk-00000.bin"]
2402          },
2403          "key_slots": [{
2404            "id": 0,
2405            "slot_type": "password",
2406            "kdf": "argon2id",
2407            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2408            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2409            "nonce": "AAAAAAAAAAAAAAAA",
2410            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2411          }]
2412        }"#;
2413        fs::write(site_dir.join("config.json"), config).unwrap();
2414        fs::create_dir_all(payload_dir.join("chunk-00000.bin")).unwrap();
2415
2416        let result = check_size_limits(site_dir);
2417        assert!(!result.passed);
2418        assert!(
2419            result
2420                .details
2421                .as_ref()
2422                .map(|d| d.contains("chunk-00000.bin must be a regular file"))
2423                .unwrap_or(false)
2424        );
2425    }
2426
2427    #[test]
2428    #[cfg(unix)]
2429    fn test_size_limits_rejects_symlinked_chunk() {
2430        use std::os::unix::fs::symlink;
2431
2432        let temp = TempDir::new().unwrap();
2433        let outside = TempDir::new().unwrap();
2434        let site_dir = temp.path();
2435        let payload_dir = site_dir.join("payload");
2436        fs::create_dir_all(&payload_dir).unwrap();
2437
2438        let config = r#"{
2439          "version": 2,
2440          "export_id": "AAAAAAAAAAAAAAAAAAAAAA==",
2441          "base_nonce": "AAAAAAAAAAAAAAAA",
2442          "compression": "deflate",
2443          "kdf_defaults": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 },
2444          "payload": {
2445            "chunk_size": 1024,
2446            "chunk_count": 1,
2447            "total_compressed_size": 14,
2448            "total_plaintext_size": 100,
2449            "files": ["payload/chunk-00000.bin"]
2450          },
2451          "key_slots": [{
2452            "id": 0,
2453            "slot_type": "password",
2454            "kdf": "argon2id",
2455            "salt": "AAAAAAAAAAAAAAAAAAAAAA==",
2456            "wrapped_dek": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
2457            "nonce": "AAAAAAAAAAAAAAAA",
2458            "argon2_params": { "memory_kb": 65536, "iterations": 3, "parallelism": 4 }
2459          }]
2460        }"#;
2461        fs::write(site_dir.join("config.json"), config).unwrap();
2462
2463        fs::write(outside.path().join("chunk-00000.bin"), "external").unwrap();
2464        symlink(
2465            outside.path().join("chunk-00000.bin"),
2466            payload_dir.join("chunk-00000.bin"),
2467        )
2468        .unwrap();
2469
2470        let result = check_size_limits(site_dir);
2471        assert!(!result.passed);
2472        assert!(
2473            result
2474                .details
2475                .as_ref()
2476                .map(|d| d.contains("must not be a symlink"))
2477                .unwrap_or(false)
2478        );
2479    }
2480
2481    #[test]
2482    fn test_integrity_path_traversal_blocked() {
2483        use std::collections::BTreeMap;
2484
2485        let temp = TempDir::new().unwrap();
2486        let site_dir = temp.path();
2487
2488        // Create integrity.json with path traversal attempt
2489        let mut files = BTreeMap::new();
2490        files.insert(
2491            "../../../etc/passwd".to_string(),
2492            crate::pages::bundle::IntegrityEntry {
2493                sha256: "deadbeef".repeat(8),
2494                size: 100,
2495            },
2496        );
2497        let manifest = IntegrityManifest {
2498            version: 1,
2499            generated_at: "2025-01-01T00:00:00Z".to_string(),
2500            files,
2501        };
2502        let manifest_json = serde_json::to_string(&manifest).unwrap();
2503        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
2504
2505        // Verify the check catches the path traversal
2506        let result = check_integrity(site_dir, false);
2507        assert!(!result.passed, "Path traversal should be blocked");
2508        assert!(
2509            result
2510                .details
2511                .as_ref()
2512                .map(|d| d.contains("security violation"))
2513                .unwrap_or(false),
2514            "Should mention security violation"
2515        );
2516    }
2517
2518    #[test]
2519    fn test_integrity_absolute_path_blocked() {
2520        use std::collections::BTreeMap;
2521
2522        let temp = TempDir::new().unwrap();
2523        let site_dir = temp.path();
2524
2525        // Create integrity.json with absolute path
2526        let mut files = BTreeMap::new();
2527        files.insert(
2528            "/etc/passwd".to_string(),
2529            crate::pages::bundle::IntegrityEntry {
2530                sha256: "deadbeef".repeat(8),
2531                size: 100,
2532            },
2533        );
2534        let manifest = IntegrityManifest {
2535            version: 1,
2536            generated_at: "2025-01-01T00:00:00Z".to_string(),
2537            files,
2538        };
2539        let manifest_json = serde_json::to_string(&manifest).unwrap();
2540        fs::write(site_dir.join("integrity.json"), manifest_json).unwrap();
2541
2542        // Verify the check catches the absolute path
2543        let result = check_integrity(site_dir, false);
2544        assert!(!result.passed, "Absolute path should be blocked");
2545        assert!(
2546            result
2547                .details
2548                .as_ref()
2549                .map(|d| d.contains("security violation"))
2550                .unwrap_or(false),
2551            "Should mention security violation"
2552        );
2553    }
2554
2555    #[test]
2556    fn test_integrity_url_encoded_traversal_blocked_single() {
2557        assert_integrity_path_blocked("%2e%2e/%2e%2e/etc/passwd");
2558    }
2559
2560    #[test]
2561    fn test_integrity_url_encoded_traversal_blocked_double() {
2562        assert_integrity_path_blocked("%252e%252e/%252e%252e/etc/passwd");
2563    }
2564
2565    #[test]
2566    fn test_integrity_url_encoded_traversal_blocked_mixed() {
2567        assert_integrity_path_blocked("%2e./etc/passwd");
2568        assert_integrity_path_blocked(".%2e/etc/passwd");
2569        assert_integrity_path_blocked("..%2fetc/passwd");
2570    }
2571
2572    #[test]
2573    fn test_integrity_url_encoded_traversal_blocked_uppercase() {
2574        assert_integrity_path_blocked("%2E%2E/%2Fetc/passwd");
2575    }
2576
2577    #[test]
2578    fn test_integrity_url_encoded_traversal_blocked_overlong_utf8() {
2579        assert_integrity_path_blocked("%c0%ae%c0%ae/%c0%ae%c0%ae/etc/passwd");
2580    }
2581
2582    #[test]
2583    fn test_integrity_url_encoded_traversal_blocked_null_byte() {
2584        assert_integrity_path_blocked("valid%00/../etc/passwd");
2585    }
2586
2587    #[test]
2588    fn test_integrity_url_encoded_traversal_blocked_backslash() {
2589        assert_integrity_path_blocked("..\\..\\etc\\passwd");
2590        assert_integrity_path_blocked("..%5c..%5cetc%5cpasswd");
2591    }
2592
2593    #[test]
2594    fn test_integrity_url_encoded_traversal_blocked_separator_confusion() {
2595        assert_integrity_path_blocked(r"..\/..\/etc\/passwd");
2596    }
2597
2598    // --- Unicode normalization attack tests ---
2599
2600    #[test]
2601    fn test_integrity_unicode_fullwidth_dots_blocked() {
2602        // U+FF0E FULLWIDTH FULL STOP looks like '.' but is a different codepoint.
2603        // Two fullwidth dots form a visual ".." that bypasses naive ASCII checks.
2604        assert_integrity_path_blocked("\u{FF0E}\u{FF0E}/etc/passwd");
2605    }
2606
2607    #[test]
2608    fn test_integrity_unicode_fullwidth_slash_blocked() {
2609        // U+FF0F FULLWIDTH SOLIDUS looks like '/' but is a different codepoint.
2610        assert_integrity_path_blocked("payload\u{FF0F}..\\..\\etc\\passwd");
2611    }
2612
2613    #[test]
2614    fn test_integrity_unicode_fullwidth_backslash_blocked() {
2615        // U+FF3C FULLWIDTH REVERSE SOLIDUS looks like '\' but is a different codepoint.
2616        assert_integrity_path_blocked("payload\u{FF3C}..\\..\\etc\\passwd");
2617    }
2618
2619    #[test]
2620    fn test_integrity_unicode_small_full_stop_blocked() {
2621        // U+FE52 SMALL FULL STOP - a compatibility variant of '.'
2622        assert_integrity_path_blocked("\u{FE52}\u{FE52}/etc/passwd");
2623    }
2624
2625    #[test]
2626    fn test_integrity_unicode_one_dot_leader_blocked() {
2627        // U+2024 ONE DOT LEADER - looks nearly identical to '.'
2628        assert_integrity_path_blocked("\u{2024}\u{2024}/etc/passwd");
2629    }
2630
2631    #[test]
2632    fn test_integrity_unicode_halfwidth_ideographic_full_stop_blocked() {
2633        // U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP
2634        assert_integrity_path_blocked("\u{FF61}\u{FF61}/etc/passwd");
2635    }
2636
2637    #[test]
2638    fn test_integrity_unicode_mixed_fullwidth_and_ascii_blocked() {
2639        // Mix fullwidth and ASCII dots — the fullwidth char alone should trigger
2640        assert_integrity_path_blocked(".\u{FF0E}/etc/passwd");
2641        assert_integrity_path_blocked("\u{FF0E}./etc/passwd");
2642    }
2643
2644    #[test]
2645    fn test_integrity_percent_encoded_unicode_fullwidth_dot_blocked() {
2646        // Percent-encoded UTF-8 for U+FF0E (FULLWIDTH FULL STOP): 0xEF 0xBC 0x8E
2647        assert_integrity_path_blocked("%ef%bc%8e%ef%bc%8e/etc/passwd");
2648    }
2649
2650    // --- Case sensitivity / Windows path tests ---
2651
2652    #[test]
2653    fn test_integrity_windows_drive_letter_blocked() {
2654        assert_integrity_path_blocked("C:\\Windows\\System32\\config\\SAM");
2655    }
2656
2657    #[test]
2658    fn test_integrity_windows_drive_letter_lowercase_blocked() {
2659        assert_integrity_path_blocked("c:\\windows\\system32");
2660    }
2661
2662    #[test]
2663    fn test_integrity_windows_drive_letter_forward_slash_blocked() {
2664        assert_integrity_path_blocked("C:/Windows/System32");
2665    }
2666
2667    #[test]
2668    fn test_integrity_windows_unc_path_blocked() {
2669        // UNC paths start with \\ — should be caught as absolute
2670        assert_integrity_path_blocked("\\\\server\\share\\file.txt");
2671    }
2672
2673    // --- Symlink traversal tests ---
2674
2675    #[test]
2676    #[cfg(unix)]
2677    fn test_integrity_symlink_traversal_blocked() {
2678        use std::os::unix::fs::symlink;
2679
2680        let temp = TempDir::new().unwrap();
2681        let site_dir = temp.path();
2682
2683        // Create a target file outside the site directory
2684        let outside_dir = TempDir::new().unwrap();
2685        let secret_file = outside_dir.path().join("secret.txt");
2686        fs::write(&secret_file, "sensitive data").unwrap();
2687
2688        // Create a symlink inside the site directory that points outside
2689        let link_path = site_dir.join("evil_link.txt");
2690        symlink(&secret_file, &link_path).unwrap();
2691
2692        // Compute hash of the file the symlink points to
2693        let hash = compute_file_hash(&link_path).unwrap();
2694        let size = fs::metadata(&link_path).unwrap().len();
2695
2696        let mut files = BTreeMap::new();
2697        files.insert(
2698            "evil_link.txt".to_string(),
2699            IntegrityEntry { sha256: hash, size },
2700        );
2701        let manifest = IntegrityManifest {
2702            version: 1,
2703            generated_at: "2025-01-01T00:00:00Z".to_string(),
2704            files,
2705        };
2706        fs::write(
2707            site_dir.join("integrity.json"),
2708            serde_json::to_string(&manifest).unwrap(),
2709        )
2710        .unwrap();
2711
2712        // The canonicalize check should detect the symlink escapes site_dir
2713        let result = check_integrity(site_dir, false);
2714        assert!(
2715            !result.passed,
2716            "Symlink traversal outside site_dir should be blocked"
2717        );
2718        assert!(
2719            result
2720                .details
2721                .as_ref()
2722                .map(|d| d.contains("security violation"))
2723                .unwrap_or(false),
2724            "Should mention security violation for symlink escape"
2725        );
2726    }
2727
2728    #[test]
2729    #[cfg(unix)]
2730    fn test_integrity_symlink_within_site_dir_allowed() {
2731        use std::os::unix::fs::symlink;
2732
2733        let temp = TempDir::new().unwrap();
2734        let site_dir = temp.path();
2735
2736        // Create a real file inside site_dir
2737        let real_file = site_dir.join("real.txt");
2738        fs::write(&real_file, "legitimate data").unwrap();
2739
2740        // Create a symlink that points to a file inside site_dir
2741        let link_path = site_dir.join("link_to_real.txt");
2742        symlink(&real_file, &link_path).unwrap();
2743
2744        let hash = compute_file_hash(&link_path).unwrap();
2745        let size = fs::metadata(&link_path).unwrap().len();
2746
2747        let mut files = BTreeMap::new();
2748        files.insert(
2749            "link_to_real.txt".to_string(),
2750            IntegrityEntry { sha256: hash, size },
2751        );
2752        // Also include the real file and integrity.json in manifest
2753        let real_hash = compute_file_hash(&real_file).unwrap();
2754        let real_size = fs::metadata(&real_file).unwrap().len();
2755        files.insert(
2756            "real.txt".to_string(),
2757            IntegrityEntry {
2758                sha256: real_hash,
2759                size: real_size,
2760            },
2761        );
2762
2763        let manifest = IntegrityManifest {
2764            version: 1,
2765            generated_at: "2025-01-01T00:00:00Z".to_string(),
2766            files,
2767        };
2768        fs::write(
2769            site_dir.join("integrity.json"),
2770            serde_json::to_string(&manifest).unwrap(),
2771        )
2772        .unwrap();
2773
2774        // Symlink within site_dir should be OK
2775        let result = check_integrity(site_dir, false);
2776        assert!(
2777            result.passed,
2778            "Symlink within site_dir should be allowed: {:?}",
2779            result.details
2780        );
2781    }
2782
2783    // --- False positive tests: legitimate paths should NOT be blocked ---
2784
2785    #[test]
2786    fn test_integrity_legitimate_dotted_version_not_blocked() {
2787        // "v2.1.0" contains dots but they're version numbers, not traversal
2788        let temp = TempDir::new().unwrap();
2789        let site_dir = temp.path();
2790        let target = site_dir.join("assets/v2.1.0/bundle.js");
2791        fs::create_dir_all(target.parent().unwrap()).unwrap();
2792        fs::write(&target, "// bundle").unwrap();
2793
2794        let hash = compute_file_hash(&target).unwrap();
2795        let size = fs::metadata(&target).unwrap().len();
2796        let mut files = BTreeMap::new();
2797        files.insert(
2798            "assets/v2.1.0/bundle.js".to_string(),
2799            IntegrityEntry { sha256: hash, size },
2800        );
2801
2802        let manifest = IntegrityManifest {
2803            version: 1,
2804            generated_at: "2025-01-01T00:00:00Z".to_string(),
2805            files,
2806        };
2807        fs::write(
2808            site_dir.join("integrity.json"),
2809            serde_json::to_string(&manifest).unwrap(),
2810        )
2811        .unwrap();
2812
2813        let result = check_integrity(site_dir, false);
2814        assert!(
2815            result.passed,
2816            "Dotted version path should not be blocked: {:?}",
2817            result.details
2818        );
2819    }
2820
2821    #[test]
2822    fn test_integrity_legitimate_hidden_file_not_blocked() {
2823        // ".nojekyll" starts with a dot — should not be confused with traversal
2824        let temp = TempDir::new().unwrap();
2825        let site_dir = temp.path();
2826        let target = site_dir.join(".nojekyll");
2827        fs::write(&target, "").unwrap();
2828
2829        let hash = compute_file_hash(&target).unwrap();
2830        let size = fs::metadata(&target).unwrap().len();
2831        let mut files = BTreeMap::new();
2832        files.insert(
2833            ".nojekyll".to_string(),
2834            IntegrityEntry { sha256: hash, size },
2835        );
2836
2837        let manifest = IntegrityManifest {
2838            version: 1,
2839            generated_at: "2025-01-01T00:00:00Z".to_string(),
2840            files,
2841        };
2842        fs::write(
2843            site_dir.join("integrity.json"),
2844            serde_json::to_string(&manifest).unwrap(),
2845        )
2846        .unwrap();
2847
2848        let result = check_integrity(site_dir, false);
2849        assert!(
2850            result.passed,
2851            "Hidden file (.nojekyll) should not be blocked: {:?}",
2852            result.details
2853        );
2854    }
2855
2856    #[test]
2857    fn test_integrity_legitimate_payload_subdir_not_blocked() {
2858        let temp = TempDir::new().unwrap();
2859        let site_dir = temp.path();
2860        let target = site_dir.join("payload/data/sessions.db");
2861        fs::create_dir_all(target.parent().unwrap()).unwrap();
2862        fs::write(&target, "sqlite").unwrap();
2863
2864        let hash = compute_file_hash(&target).unwrap();
2865        let size = fs::metadata(&target).unwrap().len();
2866        let mut files = BTreeMap::new();
2867        files.insert(
2868            "payload/data/sessions.db".to_string(),
2869            IntegrityEntry { sha256: hash, size },
2870        );
2871
2872        let manifest = IntegrityManifest {
2873            version: 1,
2874            generated_at: "2025-01-01T00:00:00Z".to_string(),
2875            files,
2876        };
2877        fs::write(
2878            site_dir.join("integrity.json"),
2879            serde_json::to_string(&manifest).unwrap(),
2880        )
2881        .unwrap();
2882
2883        let result = check_integrity(site_dir, false);
2884        assert!(
2885            result.passed,
2886            "Legitimate payload subdirectory should not be blocked: {:?}",
2887            result.details
2888        );
2889    }
2890
2891    #[test]
2892    fn test_integrity_legitimate_hyphens_underscores_not_blocked() {
2893        let temp = TempDir::new().unwrap();
2894        let site_dir = temp.path();
2895        let target = site_dir.join("css/main-v2_final.css");
2896        fs::create_dir_all(target.parent().unwrap()).unwrap();
2897        fs::write(&target, "body{}").unwrap();
2898
2899        let hash = compute_file_hash(&target).unwrap();
2900        let size = fs::metadata(&target).unwrap().len();
2901        let mut files = BTreeMap::new();
2902        files.insert(
2903            "css/main-v2_final.css".to_string(),
2904            IntegrityEntry { sha256: hash, size },
2905        );
2906
2907        let manifest = IntegrityManifest {
2908            version: 1,
2909            generated_at: "2025-01-01T00:00:00Z".to_string(),
2910            files,
2911        };
2912        fs::write(
2913            site_dir.join("integrity.json"),
2914            serde_json::to_string(&manifest).unwrap(),
2915        )
2916        .unwrap();
2917
2918        let result = check_integrity(site_dir, false);
2919        assert!(
2920            result.passed,
2921            "Path with hyphens/underscores should not be blocked: {:?}",
2922            result.details
2923        );
2924    }
2925
2926    // --- Unit tests for helper functions ---
2927
2928    #[test]
2929    fn test_contains_unicode_path_attack_detects_fullwidth_period() {
2930        assert!(contains_unicode_path_attack("\u{FF0E}"));
2931        assert!(contains_unicode_path_attack("foo\u{FF0E}bar"));
2932    }
2933
2934    #[test]
2935    fn test_contains_unicode_path_attack_detects_fullwidth_solidus() {
2936        assert!(contains_unicode_path_attack("\u{FF0F}"));
2937    }
2938
2939    #[test]
2940    fn test_contains_unicode_path_attack_detects_fullwidth_reverse_solidus() {
2941        assert!(contains_unicode_path_attack("\u{FF3C}"));
2942    }
2943
2944    #[test]
2945    fn test_contains_unicode_path_attack_detects_small_full_stop() {
2946        assert!(contains_unicode_path_attack("\u{FE52}"));
2947    }
2948
2949    #[test]
2950    fn test_contains_unicode_path_attack_detects_one_dot_leader() {
2951        assert!(contains_unicode_path_attack("\u{2024}"));
2952    }
2953
2954    #[test]
2955    fn test_contains_unicode_path_attack_allows_ascii() {
2956        assert!(!contains_unicode_path_attack("payload/chunk-00000.bin"));
2957        assert!(!contains_unicode_path_attack("../etc/passwd")); // traversal, but ASCII
2958        assert!(!contains_unicode_path_attack(".nojekyll"));
2959    }
2960
2961    #[test]
2962    fn test_detect_encoded_path_violation_unicode_attack() {
2963        let result = detect_encoded_path_violation("\u{FF0E}\u{FF0E}/etc/passwd");
2964        assert_eq!(result, Some("unicode normalization attack".to_string()));
2965    }
2966
2967    #[test]
2968    fn test_detect_encoded_path_violation_percent_encoded_unicode() {
2969        // %EF%BC%8E = UTF-8 encoding of U+FF0E (FULLWIDTH FULL STOP)
2970        let result = detect_encoded_path_violation("%ef%bc%8e%ef%bc%8e/etc/passwd");
2971        assert_eq!(
2972            result,
2973            Some("url-encoded unicode normalization attack".to_string())
2974        );
2975    }
2976
2977    // --- Additional Unicode normalization attack tests (coding_agent_session_search-13za) ---
2978
2979    #[test]
2980    fn test_integrity_unicode_combining_long_solidus_overlay_blocked() {
2981        // U+0338 COMBINING LONG SOLIDUS OVERLAY - could visually disguise characters
2982        assert_integrity_path_blocked(".\u{0338}./etc/passwd");
2983    }
2984
2985    #[test]
2986    fn test_integrity_unicode_combining_short_solidus_overlay_blocked() {
2987        // U+0337 COMBINING SHORT SOLIDUS OVERLAY
2988        assert_integrity_path_blocked(".\u{0337}./etc/passwd");
2989    }
2990
2991    #[test]
2992    fn test_integrity_unicode_rtl_override_blocked() {
2993        // U+202E RIGHT-TO-LEFT OVERRIDE - can visually reverse path display
2994        // This could make "etc/passwd/../" appear as a safe path when it's actually traversal
2995        assert_integrity_path_blocked("etc/passwd/\u{202E}../");
2996    }
2997
2998    #[test]
2999    fn test_integrity_unicode_ltr_override_blocked() {
3000        // U+202D LEFT-TO-RIGHT OVERRIDE - directional override
3001        assert_integrity_path_blocked("\u{202D}../etc/passwd");
3002    }
3003
3004    #[test]
3005    fn test_integrity_unicode_rtl_embedding_blocked() {
3006        // U+202B RIGHT-TO-LEFT EMBEDDING
3007        assert_integrity_path_blocked("\u{202B}../etc/passwd");
3008    }
3009
3010    #[test]
3011    fn test_integrity_unicode_rtl_isolate_blocked() {
3012        // U+2067 RIGHT-TO-LEFT ISOLATE
3013        assert_integrity_path_blocked("\u{2067}../etc/passwd");
3014    }
3015
3016    #[test]
3017    fn test_integrity_unicode_zero_width_joiner_blocked() {
3018        // U+200D ZERO WIDTH JOINER - invisible character that could split tokens
3019        assert_integrity_path_blocked(".\u{200D}./etc/passwd");
3020    }
3021
3022    #[test]
3023    fn test_integrity_unicode_zero_width_non_joiner_blocked() {
3024        // U+200C ZERO WIDTH NON-JOINER
3025        assert_integrity_path_blocked(".\u{200C}./etc/passwd");
3026    }
3027
3028    #[test]
3029    fn test_integrity_unicode_zero_width_space_blocked() {
3030        // U+200B ZERO WIDTH SPACE - invisible character
3031        assert_integrity_path_blocked("..\u{200B}/etc/passwd");
3032    }
3033
3034    #[test]
3035    fn test_integrity_unicode_bom_blocked() {
3036        // U+FEFF BYTE ORDER MARK (ZERO WIDTH NO-BREAK SPACE)
3037        assert_integrity_path_blocked("\u{FEFF}../etc/passwd");
3038    }
3039
3040    #[test]
3041    fn test_integrity_unicode_fraction_slash_blocked() {
3042        // U+2044 FRACTION SLASH - visually similar to /
3043        assert_integrity_path_blocked("..\u{2044}etc\u{2044}passwd");
3044    }
3045
3046    #[test]
3047    fn test_integrity_unicode_division_slash_blocked() {
3048        // U+2215 DIVISION SLASH - visually similar to /
3049        assert_integrity_path_blocked("..\u{2215}etc\u{2215}passwd");
3050    }
3051
3052    #[test]
3053    fn test_integrity_unicode_big_solidus_blocked() {
3054        // U+29F8 BIG SOLIDUS - another slash look-alike
3055        assert_integrity_path_blocked("..\u{29F8}etc\u{29F8}passwd");
3056    }
3057
3058    #[test]
3059    fn test_integrity_unicode_vai_full_stop_blocked() {
3060        // U+A60E VAI FULL STOP - dot look-alike
3061        assert_integrity_path_blocked("\u{A60E}\u{A60E}/etc/passwd");
3062    }
3063
3064    #[test]
3065    fn test_integrity_unicode_syriac_full_stop_blocked() {
3066        // U+0701 SYRIAC SUPRALINEAR FULL STOP - dot look-alike
3067        assert_integrity_path_blocked("\u{0701}\u{0701}/etc/passwd");
3068    }
3069
3070    // --- NFD/NFC normalization form tests ---
3071
3072    #[test]
3073    fn test_integrity_unicode_nfd_decomposed_not_exploitable() {
3074        // NFD decomposition of certain characters could potentially be exploited
3075        // For example, some characters have canonical decompositions
3076        // This test verifies that legitimate paths with accented chars work
3077        let temp = TempDir::new().unwrap();
3078        let site_dir = temp.path();
3079
3080        // Create a file with an accented filename (NFC form - precomposed)
3081        let target = site_dir.join("café.txt");
3082        fs::write(&target, "coffee").unwrap();
3083
3084        let hash = compute_file_hash(&target).unwrap();
3085        let size = fs::metadata(&target).unwrap().len();
3086        let mut files = BTreeMap::new();
3087        files.insert(
3088            "café.txt".to_string(),
3089            IntegrityEntry { sha256: hash, size },
3090        );
3091
3092        let manifest = IntegrityManifest {
3093            version: 1,
3094            generated_at: "2025-01-01T00:00:00Z".to_string(),
3095            files,
3096        };
3097        fs::write(
3098            site_dir.join("integrity.json"),
3099            serde_json::to_string(&manifest).unwrap(),
3100        )
3101        .unwrap();
3102
3103        // Legitimate accented filenames should be allowed
3104        let result = check_integrity(site_dir, false);
3105        assert!(
3106            result.passed,
3107            "Legitimate accented filename should be allowed: {:?}",
3108            result.details
3109        );
3110    }
3111
3112    // --- Unit tests for extended helper functions ---
3113
3114    #[test]
3115    fn test_contains_unicode_path_attack_detects_combining_overlay() {
3116        assert!(contains_unicode_path_attack("\u{0338}")); // COMBINING LONG SOLIDUS OVERLAY
3117        assert!(contains_unicode_path_attack("\u{0337}")); // COMBINING SHORT SOLIDUS OVERLAY
3118    }
3119
3120    #[test]
3121    fn test_contains_unicode_path_attack_detects_zero_width() {
3122        assert!(contains_unicode_path_attack("\u{200D}")); // ZERO WIDTH JOINER
3123        assert!(contains_unicode_path_attack("\u{200C}")); // ZERO WIDTH NON-JOINER
3124        assert!(contains_unicode_path_attack("\u{200B}")); // ZERO WIDTH SPACE
3125        assert!(contains_unicode_path_attack("\u{FEFF}")); // BOM
3126    }
3127
3128    #[test]
3129    fn test_contains_unicode_path_attack_detects_rtl_overrides() {
3130        assert!(contains_unicode_path_attack("\u{202E}")); // RTL OVERRIDE
3131        assert!(contains_unicode_path_attack("\u{202D}")); // LTR OVERRIDE
3132        assert!(contains_unicode_path_attack("\u{202B}")); // RTL EMBEDDING
3133        assert!(contains_unicode_path_attack("\u{2067}")); // RTL ISOLATE
3134    }
3135
3136    #[test]
3137    fn test_contains_unicode_path_attack_detects_confusable_slashes() {
3138        assert!(contains_unicode_path_attack("\u{2044}")); // FRACTION SLASH
3139        assert!(contains_unicode_path_attack("\u{2215}")); // DIVISION SLASH
3140        assert!(contains_unicode_path_attack("\u{29F8}")); // BIG SOLIDUS
3141    }
3142
3143    #[test]
3144    fn test_contains_unicode_path_attack_detects_confusable_dots() {
3145        assert!(contains_unicode_path_attack("\u{A60E}")); // VAI FULL STOP
3146        assert!(contains_unicode_path_attack("\u{0701}")); // SYRIAC SUPRALINEAR FULL STOP
3147        assert!(contains_unicode_path_attack("\u{0702}")); // SYRIAC SUBLINEAR FULL STOP
3148    }
3149
3150    #[test]
3151    fn test_detect_encoded_path_violation_rtl_override() {
3152        let result = detect_encoded_path_violation("etc/passwd/\u{202E}../");
3153        assert_eq!(result, Some("unicode normalization attack".to_string()));
3154    }
3155
3156    #[test]
3157    fn test_detect_encoded_path_violation_zero_width_joiner() {
3158        let result = detect_encoded_path_violation(".\u{200D}./etc/passwd");
3159        assert_eq!(result, Some("unicode normalization attack".to_string()));
3160    }
3161
3162    #[test]
3163    fn test_detect_encoded_path_violation_fraction_slash() {
3164        let result = detect_encoded_path_violation("..\u{2044}etc\u{2044}passwd");
3165        assert_eq!(result, Some("unicode normalization attack".to_string()));
3166    }
3167}