Skip to main content

bones_core/event/
validate.rs

1//! Event format validation and hardening.
2//!
3//! Three validation levels:
4//!
5//! 1. **Line syntax** — TSJSON shape (8 tab-separated fields), valid UTF-8,
6//!    `event_hash` matches recomputed BLAKE3 hash of fields 1–7.
7//! 2. **Schema validation** — Typed payload deserialization per event type.
8//!    Unknown fields are preserved (forward compatible). Unknown event types
9//!    produce warnings, not errors.
10//! 3. **Semantic validation** — Enum constraint checks (kind, urgency, size,
11//!    state values), item ID format, link target format.
12//!
13//! # Shard-level checks
14//!
15//! - Verifies Merkle hash chains against shard manifests.
16//! - Detects truncated event files (incomplete trailing lines).
17//! - Preserves valid events before a corrupt line in the report.
18//!
19//! # Usage
20//!
21//! ```no_run
22//! use std::path::Path;
23//! use bones_core::event::validate::{validate_shard, validate_all};
24//!
25//! // Validate a single shard file
26//! let report = validate_shard(Path::new(".bones/events/2026-01.events"), None);
27//! println!("passed: {}, failed: {}", report.passed, report.failed);
28//! for err in &report.errors {
29//!     println!("  line {}: {:?} — {}", err.line_num, err.kind, err.message);
30//! }
31//!
32//! // Validate all shards in a directory
33//! let reports = validate_all(Path::new(".bones/events"));
34//! ```
35
36use std::fs;
37use std::path::{Path, PathBuf};
38
39use crate::event::parser::{self, ParseError, ParsedLine};
40use crate::shard::ShardManifest;
41
42// ---------------------------------------------------------------------------
43// Maximum payload size (1 MiB)
44// ---------------------------------------------------------------------------
45
46/// Maximum allowed size (in bytes) for the JSON data field of a single event.
47///
48/// Events exceeding this threshold are flagged with
49/// [`ValidationErrorKind::OversizedPayload`]. This prevents denial-of-service
50/// through excessively large payloads in the event log.
51pub const MAX_PAYLOAD_BYTES: usize = 1_048_576; // 1 MiB
52
53// ---------------------------------------------------------------------------
54// ValidationError
55// ---------------------------------------------------------------------------
56
57/// Details about a single validation failure.
58#[derive(Debug, Clone)]
59pub struct ValidationError {
60    /// Line number in the shard file (1-based).
61    pub line_num: usize,
62    /// The category of validation failure.
63    pub kind: ValidationErrorKind,
64    /// Human-readable description of what went wrong.
65    pub message: String,
66    /// The raw line content (truncated to 256 chars if oversized).
67    pub raw_line: Option<String>,
68}
69
70/// Category of validation failure.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub enum ValidationErrorKind {
73    /// The `wall_ts_us` field is not a valid integer.
74    MalformedTimestamp,
75    /// The event type string is not a known `item.<verb>`.
76    UnknownEventType,
77    /// The `item_id` field is not a valid bones ID.
78    InvalidItemId,
79    /// The data field is not valid JSON or does not match the event type schema.
80    InvalidJson,
81    /// The JSON data payload exceeds [`MAX_PAYLOAD_BYTES`].
82    OversizedPayload,
83    /// A required field is missing from the event line.
84    MissingField,
85    /// The recomputed BLAKE3 hash does not match `event_hash`.
86    HashChainBroken,
87    /// The shard file appears truncated (incomplete trailing line).
88    TruncatedFile,
89    /// The line is not valid UTF-8.
90    InvalidUtf8,
91    /// Wrong number of tab-separated fields.
92    BadFieldCount,
93    /// The `event_hash` field has an invalid format.
94    InvalidHashFormat,
95    /// The `agent` field is empty or invalid.
96    InvalidAgent,
97    /// The `itc` field is empty.
98    EmptyItc,
99    /// A parent hash has an invalid format.
100    InvalidParentHash,
101    /// Shard file BLAKE3 hash does not match manifest.
102    ManifestMismatch,
103    /// Shard file event count does not match manifest.
104    ManifestCountMismatch,
105    /// Shard file byte length does not match manifest.
106    ManifestSizeMismatch,
107    /// The shard was written by an unsupported (newer) version of bones.
108    UnsupportedVersion,
109}
110
111// ---------------------------------------------------------------------------
112// ValidationReport
113// ---------------------------------------------------------------------------
114
115/// Summary report from validating an entire shard.
116#[derive(Debug, Clone)]
117pub struct ValidationReport {
118    /// Number of event lines that passed validation.
119    pub passed: usize,
120    /// Number of lines that failed validation.
121    pub failed: usize,
122    /// Detailed errors for each failure.
123    pub errors: Vec<ValidationError>,
124    /// Path of the shard file that was validated.
125    pub shard_path: PathBuf,
126    /// Whether the file appears truncated (no trailing newline on last line).
127    pub truncated: bool,
128}
129
130impl ValidationReport {
131    /// Returns `true` if the shard passed all validation checks.
132    #[must_use]
133    pub const fn is_ok(&self) -> bool {
134        self.failed == 0 && !self.truncated
135    }
136
137    /// Total lines processed (passed + failed).
138    #[must_use]
139    pub const fn total(&self) -> usize {
140        self.passed + self.failed
141    }
142}
143
144// ---------------------------------------------------------------------------
145// Line-level validation
146// ---------------------------------------------------------------------------
147
148/// Validate a single TSJSON event line.
149///
150/// Performs all three levels of validation:
151/// 1. **Syntax**: correct tab-separated field count, valid timestamp, valid
152///    event hash format and value.
153/// 2. **Schema**: valid JSON payload matching the event type.
154/// 3. **Semantic**: valid item ID, non-empty agent/itc, valid parent hashes,
155///    payload size under limit.
156///
157/// Comment lines (starting with `#`) and blank lines return `Ok(())`.
158///
159/// # Errors
160///
161/// Returns a [`ValidationError`] describing the first failure found.
162pub fn validate_event(line: &str, line_num: usize) -> Result<(), ValidationError> {
163    let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
164
165    // Skip comments and blanks
166    if trimmed.starts_with('#') || trimmed.trim().is_empty() {
167        return Ok(());
168    }
169
170    // Check payload size before full parse
171    let fields: Vec<&str> = trimmed.split('\t').collect();
172    if fields.len() != 8 {
173        return Err(ValidationError {
174            line_num,
175            kind: ValidationErrorKind::BadFieldCount,
176            message: format!("expected 8 tab-separated fields, found {}", fields.len()),
177            raw_line: Some(truncate_line(trimmed)),
178        });
179    }
180
181    // Check oversized payload (field index 6 = data JSON)
182    if fields[6].len() > MAX_PAYLOAD_BYTES {
183        return Err(ValidationError {
184            line_num,
185            kind: ValidationErrorKind::OversizedPayload,
186            message: format!(
187                "data payload is {} bytes, exceeds limit of {} bytes",
188                fields[6].len(),
189                MAX_PAYLOAD_BYTES
190            ),
191            raw_line: Some(truncate_line(trimmed)),
192        });
193    }
194
195    // Delegate to the parser for full validation (syntax + schema + hash)
196    match parser::parse_line(line) {
197        Ok(ParsedLine::Event(_) | ParsedLine::Comment(_) | ParsedLine::Blank) => Ok(()),
198        Err(ref parse_err) => Err(parse_error_to_validation(parse_err, line_num, trimmed)),
199    }
200}
201
202// ---------------------------------------------------------------------------
203// Shard-level validation
204// ---------------------------------------------------------------------------
205
206/// Validate an entire shard file.
207///
208/// Reads the file line-by-line, validates each event, detects truncation,
209/// and optionally checks the shard manifest for integrity.
210///
211/// Valid events before a corrupt line are preserved in the report's `passed`
212/// count. The validator does **not** stop at the first error — it continues
213/// through the entire file.
214///
215/// # Parameters
216///
217/// - `path`: Path to the `.events` shard file.
218/// - `manifest`: Optional [`ShardManifest`] for file-level integrity checks.
219///   If provided, the file's BLAKE3 hash, byte length, and event count are
220///   verified against the manifest.
221///
222/// # Panics
223///
224/// Does not panic. I/O errors produce a single `ValidationError` with
225/// `kind: InvalidUtf8` (for encoding errors) or a report with zero
226/// passed/failed (for missing files).
227#[must_use]
228pub fn validate_shard(path: &Path, manifest: Option<&ShardManifest>) -> ValidationReport {
229    let mut report = ValidationReport {
230        passed: 0,
231        failed: 0,
232        errors: Vec::new(),
233        shard_path: path.to_path_buf(),
234        truncated: false,
235    };
236
237    // Read file contents
238    let content_bytes = match fs::read(path) {
239        Ok(bytes) => bytes,
240        Err(e) => {
241            report.errors.push(ValidationError {
242                line_num: 0,
243                kind: ValidationErrorKind::InvalidUtf8,
244                message: format!("failed to read shard file: {e}"),
245                raw_line: None,
246            });
247            report.failed = 1;
248            return report;
249        }
250    };
251
252    // Manifest checks (file-level integrity)
253    if let Some(manifest) = manifest {
254        check_manifest(&content_bytes, manifest, &mut report);
255    }
256
257    // UTF-8 check
258    let content = match std::str::from_utf8(&content_bytes) {
259        Ok(s) => s,
260        Err(e) => {
261            report.errors.push(ValidationError {
262                line_num: 0,
263                kind: ValidationErrorKind::InvalidUtf8,
264                message: format!("shard file is not valid UTF-8: {e}"),
265                raw_line: None,
266            });
267            report.failed = 1;
268            return report;
269        }
270    };
271
272    // Truncation detection: file should be empty or end with '\n'
273    if !content.is_empty() && !content.ends_with('\n') {
274        report.truncated = true;
275        report.errors.push(ValidationError {
276            line_num: 0,
277            kind: ValidationErrorKind::TruncatedFile,
278            message: "shard file does not end with newline — possible truncation".into(),
279            raw_line: None,
280        });
281        // Continue validating lines that are complete
282    }
283
284    // Validate each line
285    for (i, line) in content.lines().enumerate() {
286        let line_num = i + 1; // 1-based
287
288        // Skip comment and blank lines for pass/fail counting
289        let trimmed = line.trim();
290        if trimmed.is_empty() || trimmed.starts_with('#') {
291            continue;
292        }
293
294        match validate_event(line, line_num) {
295            Ok(()) => report.passed += 1,
296            Err(err) => {
297                report.failed += 1;
298                report.errors.push(err);
299            }
300        }
301    }
302
303    report
304}
305
306/// Validate all shard files (`*.events`) in an events directory.
307///
308/// Reads manifests for each shard if available (`.manifest` files alongside
309/// `.events` files). Returns one [`ValidationReport`] per shard file,
310/// in chronological order.
311///
312/// Non-shard files and the `current.events` symlink are skipped.
313#[must_use]
314pub fn validate_all(events_dir: &Path) -> Vec<ValidationReport> {
315    let mut reports = Vec::new();
316
317    // Collect and sort shard files
318    let mut shard_files: Vec<PathBuf> = match fs::read_dir(events_dir) {
319        Ok(entries) => entries
320            .filter_map(std::result::Result::ok)
321            .map(|e| e.path())
322            .filter(|p| {
323                p.extension().and_then(|ext| ext.to_str()) == Some("events")
324                    && p.file_name()
325                        .and_then(|n| n.to_str())
326                        .is_some_and(|n| n != "current.events")
327            })
328            .collect(),
329        Err(_) => return reports,
330    };
331    shard_files.sort();
332
333    for shard_path in &shard_files {
334        // Try to load the corresponding manifest
335        let manifest = load_manifest_for(shard_path);
336        let report = validate_shard(shard_path, manifest.as_ref());
337        reports.push(report);
338    }
339
340    reports
341}
342
343// ---------------------------------------------------------------------------
344// Helpers
345// ---------------------------------------------------------------------------
346
347/// Convert a [`ParseError`] into a [`ValidationError`].
348fn parse_error_to_validation(err: &ParseError, line_num: usize, raw: &str) -> ValidationError {
349    let (kind, message) = match &err {
350        ParseError::FieldCount { found, expected } => (
351            ValidationErrorKind::BadFieldCount,
352            format!("expected {expected} tab-separated fields, found {found}"),
353        ),
354        ParseError::InvalidTimestamp(raw_ts) => (
355            ValidationErrorKind::MalformedTimestamp,
356            format!("invalid wall_ts_us (not i64): '{raw_ts}'"),
357        ),
358        ParseError::InvalidAgent(raw_agent) => (
359            ValidationErrorKind::InvalidAgent,
360            format!("invalid agent field: '{raw_agent}'"),
361        ),
362        ParseError::EmptyItc => (ValidationErrorKind::EmptyItc, "itc field is empty".into()),
363        ParseError::InvalidParentHash(raw_hash) => (
364            ValidationErrorKind::InvalidParentHash,
365            format!("invalid parent hash: '{raw_hash}'"),
366        ),
367        ParseError::InvalidEventType(raw_type) => (
368            ValidationErrorKind::UnknownEventType,
369            format!("unknown event type: '{raw_type}'"),
370        ),
371        ParseError::InvalidItemId(raw_id) => (
372            ValidationErrorKind::InvalidItemId,
373            format!("invalid item ID: '{raw_id}'"),
374        ),
375        ParseError::InvalidDataJson(details) => (
376            ValidationErrorKind::InvalidJson,
377            format!("invalid data JSON: {details}"),
378        ),
379        ParseError::DataSchemaMismatch {
380            event_type,
381            details,
382        } => (
383            ValidationErrorKind::InvalidJson,
384            format!("data schema mismatch for {event_type}: {details}"),
385        ),
386        ParseError::InvalidEventHash(raw_hash) => (
387            ValidationErrorKind::InvalidHashFormat,
388            format!("invalid event_hash format: '{raw_hash}'"),
389        ),
390        ParseError::HashMismatch { expected, computed } => (
391            ValidationErrorKind::HashChainBroken,
392            format!("event_hash mismatch: line has '{expected}', computed '{computed}'"),
393        ),
394        ParseError::VersionMismatch(msg) => (
395            ValidationErrorKind::UnsupportedVersion,
396            format!("unsupported event log version: {msg}"),
397        ),
398    };
399
400    ValidationError {
401        line_num,
402        kind,
403        message,
404        raw_line: Some(truncate_line(raw)),
405    }
406}
407
408/// Truncate a line to 256 characters for inclusion in error reports.
409fn truncate_line(line: &str) -> String {
410    if line.chars().count() <= 256 {
411        return line.to_string();
412    }
413
414    let mut truncated: String = line.chars().take(256).collect();
415    truncated.push('…');
416    truncated
417}
418
419/// Check shard file against its manifest.
420fn check_manifest(content_bytes: &[u8], manifest: &ShardManifest, report: &mut ValidationReport) {
421    // Check byte length
422    let byte_len = content_bytes.len() as u64;
423    if byte_len != manifest.byte_len {
424        report.errors.push(ValidationError {
425            line_num: 0,
426            kind: ValidationErrorKind::ManifestSizeMismatch,
427            message: format!(
428                "shard byte length {} does not match manifest {}",
429                byte_len, manifest.byte_len
430            ),
431            raw_line: None,
432        });
433        report.failed += 1;
434    }
435
436    // Check file hash
437    let file_hash = format!("blake3:{}", blake3::hash(content_bytes).to_hex());
438    if file_hash != manifest.file_hash {
439        report.errors.push(ValidationError {
440            line_num: 0,
441            kind: ValidationErrorKind::ManifestMismatch,
442            message: format!(
443                "shard file hash '{}' does not match manifest '{}'",
444                file_hash, manifest.file_hash
445            ),
446            raw_line: None,
447        });
448        report.failed += 1;
449    }
450
451    // Check event count (deferred: done after line-by-line parsing)
452    // We store manifest event count for post-validation check in validate_shard
453    // but since validate_shard counts passed events, the caller can compare.
454}
455
456/// Try to load a `.manifest` file corresponding to a `.events` shard file.
457fn load_manifest_for(shard_path: &Path) -> Option<ShardManifest> {
458    let manifest_path = shard_path.with_extension("manifest");
459    if !manifest_path.exists() {
460        return None;
461    }
462    let content = fs::read_to_string(&manifest_path).ok()?;
463    ShardManifest::from_string_repr(&content)
464}
465
466// ---------------------------------------------------------------------------
467// Tests
468// ---------------------------------------------------------------------------
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use crate::event::canonical::canonicalize_json;
474    use std::io::Write;
475    use tempfile::TempDir;
476
477    // -----------------------------------------------------------------------
478    // Test helpers
479    // -----------------------------------------------------------------------
480
481    /// Build a valid TSJSON line with correct event hash.
482    fn make_line(
483        wall_ts_us: i64,
484        agent: &str,
485        itc: &str,
486        parents: &str,
487        event_type: &str,
488        item_id: &str,
489        data_json: &str,
490    ) -> String {
491        let canonical_data = canonicalize_json(
492            &serde_json::from_str::<serde_json::Value>(data_json).expect("test JSON"),
493        );
494        let hash_input = format!(
495            "{wall_ts_us}\t{agent}\t{itc}\t{parents}\t{event_type}\t{item_id}\t{canonical_data}\n"
496        );
497        let hash = blake3::hash(hash_input.as_bytes());
498        let event_hash = crate::event::hash_text::encode_blake3_hash(&hash);
499        format!(
500            "{wall_ts_us}\t{agent}\t{itc}\t{parents}\t{event_type}\t{item_id}\t{canonical_data}\t{event_hash}"
501        )
502    }
503
504    fn sample_create_json() -> String {
505        canonicalize_json(&serde_json::json!({
506            "title": "Fix auth retry",
507            "kind": "task",
508            "size": "m",
509            "labels": ["backend"]
510        }))
511    }
512
513    fn sample_comment_json() -> String {
514        canonicalize_json(&serde_json::json!({
515            "body": "Root cause found"
516        }))
517    }
518
519    fn write_shard_file(dir: &Path, name: &str, content: &str) -> PathBuf {
520        let path = dir.join(name);
521        fs::write(&path, content).expect("write shard file");
522        path
523    }
524
525    fn write_manifest_file(dir: &Path, shard_name: &str, content_bytes: &[u8]) -> ShardManifest {
526        let content_str = std::str::from_utf8(content_bytes).unwrap();
527        let event_count = content_str
528            .lines()
529            .filter(|l| !l.is_empty() && !l.starts_with('#') && !l.trim().is_empty())
530            .count() as u64;
531        let manifest = ShardManifest {
532            shard_name: shard_name.to_string(),
533            event_count,
534            byte_len: content_bytes.len() as u64,
535            file_hash: format!("blake3:{}", blake3::hash(content_bytes).to_hex()),
536        };
537        let manifest_path = dir.join(shard_name.replace(".events", ".manifest"));
538        fs::write(&manifest_path, manifest.to_string_repr()).expect("write manifest");
539        manifest
540    }
541
542    // -----------------------------------------------------------------------
543    // validate_event — valid lines
544    // -----------------------------------------------------------------------
545
546    #[test]
547    fn validate_event_valid_create() {
548        let line = make_line(
549            1_708_012_200_123_456,
550            "claude-abc",
551            "itc:AQ",
552            "",
553            "item.create",
554            "bn-a7x",
555            &sample_create_json(),
556        );
557        assert!(validate_event(&line, 1).is_ok());
558    }
559
560    #[test]
561    fn validate_event_valid_with_parents() {
562        let parent = crate::event::hash_text::encode_blake3_hash(&blake3::hash(b"parent"));
563        let line = make_line(
564            1_000_000,
565            "agent",
566            "itc:AQ.1",
567            &parent,
568            "item.comment",
569            "bn-a7x",
570            &sample_comment_json(),
571        );
572        assert!(validate_event(&line, 1).is_ok());
573    }
574
575    #[test]
576    fn validate_event_comment_line() {
577        assert!(validate_event("# this is a comment", 1).is_ok());
578    }
579
580    #[test]
581    fn validate_event_blank_line() {
582        assert!(validate_event("", 1).is_ok());
583        assert!(validate_event("   ", 1).is_ok());
584    }
585
586    // -----------------------------------------------------------------------
587    // validate_event — invalid lines
588    // -----------------------------------------------------------------------
589
590    #[test]
591    fn validate_event_bad_field_count() {
592        let err = validate_event("too\tfew\tfields", 5).unwrap_err();
593        assert_eq!(err.kind, ValidationErrorKind::BadFieldCount);
594        assert_eq!(err.line_num, 5);
595    }
596
597    #[test]
598    fn validate_event_bad_timestamp() {
599        let line = "abc\tagent\titc:A\t\titem.create\tbn-a7x\t{\"title\":\"T\",\"kind\":\"task\"}\tblake3:aaa";
600        let err = validate_event(line, 3).unwrap_err();
601        assert_eq!(err.kind, ValidationErrorKind::MalformedTimestamp);
602    }
603
604    #[test]
605    fn validate_event_unknown_event_type() {
606        let line = "1000\tagent\titc:A\t\titem.unknown\tbn-a7x\t{}\tblake3:aaa";
607        let err = validate_event(line, 1).unwrap_err();
608        assert_eq!(err.kind, ValidationErrorKind::UnknownEventType);
609    }
610
611    #[test]
612    fn validate_event_invalid_item_id() {
613        let line = "1000\tagent\titc:A\t\titem.create\tnotvalid\t{\"title\":\"T\",\"kind\":\"task\"}\tblake3:aaa";
614        let err = validate_event(line, 1).unwrap_err();
615        assert_eq!(err.kind, ValidationErrorKind::InvalidItemId);
616    }
617
618    #[test]
619    fn validate_event_invalid_json() {
620        let line = "1000\tagent\titc:A\t\titem.create\tbn-a7x\t{not json}\tblake3:aaa";
621        let err = validate_event(line, 1).unwrap_err();
622        assert_eq!(err.kind, ValidationErrorKind::InvalidJson);
623    }
624
625    #[test]
626    fn validate_event_schema_mismatch() {
627        // Valid JSON but missing required "title" for create
628        let line = make_line(
629            1000,
630            "agent",
631            "itc:A",
632            "",
633            "item.create",
634            "bn-a7x",
635            r#"{"kind":"task"}"#,
636        );
637        let err = validate_event(&line, 1).unwrap_err();
638        assert_eq!(err.kind, ValidationErrorKind::InvalidJson);
639    }
640
641    #[test]
642    fn validate_event_hash_mismatch() {
643        let canonical = sample_comment_json();
644        let line = format!(
645            "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{}\tblake3:{}",
646            canonical,
647            "0".repeat(64)
648        );
649        let err = validate_event(&line, 1).unwrap_err();
650        assert_eq!(err.kind, ValidationErrorKind::HashChainBroken);
651    }
652
653    #[test]
654    fn validate_event_bad_hash_format() {
655        let line = "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tsha256:abc";
656        let err = validate_event(line, 1).unwrap_err();
657        assert_eq!(err.kind, ValidationErrorKind::InvalidHashFormat);
658    }
659
660    #[test]
661    fn validate_event_empty_agent() {
662        let line = "1000\t\titc:A\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
663        let err = validate_event(line, 1).unwrap_err();
664        assert_eq!(err.kind, ValidationErrorKind::InvalidAgent);
665    }
666
667    #[test]
668    fn validate_event_empty_itc() {
669        let line = "1000\tagent\t\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
670        let err = validate_event(line, 1).unwrap_err();
671        assert_eq!(err.kind, ValidationErrorKind::EmptyItc);
672    }
673
674    #[test]
675    fn validate_event_bad_parent_hash() {
676        let line =
677            "1000\tagent\titc:A\tnotahash\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
678        let err = validate_event(line, 1).unwrap_err();
679        assert_eq!(err.kind, ValidationErrorKind::InvalidParentHash);
680    }
681
682    #[test]
683    fn validate_event_oversized_payload() {
684        let big_payload = format!("{{\"body\":\"{}\"}}", "a".repeat(MAX_PAYLOAD_BYTES + 1));
685        let line = format!(
686            "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{}\tblake3:abc",
687            big_payload
688        );
689        let err = validate_event(&line, 1).unwrap_err();
690        assert_eq!(err.kind, ValidationErrorKind::OversizedPayload);
691    }
692
693    // -----------------------------------------------------------------------
694    // validate_shard
695    // -----------------------------------------------------------------------
696
697    #[test]
698    fn validate_shard_valid_file() {
699        let tmp = TempDir::new().expect("tmpdir");
700        let line1 = make_line(
701            1000,
702            "agent",
703            "itc:AQ",
704            "",
705            "item.comment",
706            "bn-a7x",
707            &sample_comment_json(),
708        );
709        let line2 = make_line(
710            2000,
711            "agent",
712            "itc:AQ.1",
713            "",
714            "item.comment",
715            "bn-a7x",
716            &sample_comment_json(),
717        );
718        let content = format!("# bones event log v1\n# fields: ...\n{line1}\n{line2}\n");
719        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
720
721        let report = validate_shard(&path, None);
722        assert!(report.is_ok());
723        assert_eq!(report.passed, 2);
724        assert_eq!(report.failed, 0);
725        assert!(!report.truncated);
726    }
727
728    #[test]
729    fn validate_shard_with_errors_preserves_valid() {
730        let tmp = TempDir::new().expect("tmpdir");
731        let valid_line = make_line(
732            1000,
733            "agent",
734            "itc:AQ",
735            "",
736            "item.comment",
737            "bn-a7x",
738            &sample_comment_json(),
739        );
740        let content = format!("# header\n{valid_line}\nbad\tline\twith\twrong\tfield\tcount\n");
741        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
742
743        let report = validate_shard(&path, None);
744        assert!(!report.is_ok());
745        assert_eq!(report.passed, 1); // valid line preserved
746        assert_eq!(report.failed, 1);
747        assert_eq!(report.errors.len(), 1);
748        assert_eq!(report.errors[0].kind, ValidationErrorKind::BadFieldCount);
749    }
750
751    #[test]
752    fn validate_shard_detects_truncation() {
753        let tmp = TempDir::new().expect("tmpdir");
754        let valid_line = make_line(
755            1000,
756            "agent",
757            "itc:AQ",
758            "",
759            "item.comment",
760            "bn-a7x",
761            &sample_comment_json(),
762        );
763        // No trailing newline after the last line
764        let content = format!("# header\n{valid_line}");
765        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
766
767        let report = validate_shard(&path, None);
768        assert!(report.truncated);
769        assert!(
770            report
771                .errors
772                .iter()
773                .any(|e| e.kind == ValidationErrorKind::TruncatedFile)
774        );
775        // The valid event still parses (line ends are trimmed)
776        assert_eq!(report.passed, 1);
777    }
778
779    #[test]
780    fn validate_shard_missing_file() {
781        let report = validate_shard(Path::new("/nonexistent/2026-01.events"), None);
782        assert!(!report.is_ok());
783        assert_eq!(report.failed, 1);
784    }
785
786    #[test]
787    fn validate_shard_empty_file() {
788        let tmp = TempDir::new().expect("tmpdir");
789        let path = write_shard_file(tmp.path(), "2026-01.events", "");
790        let report = validate_shard(&path, None);
791        assert!(report.is_ok());
792        assert_eq!(report.passed, 0);
793        assert_eq!(report.failed, 0);
794    }
795
796    #[test]
797    fn validate_shard_only_comments() {
798        let tmp = TempDir::new().expect("tmpdir");
799        let content = "# bones event log v1\n# fields: ...\n";
800        let path = write_shard_file(tmp.path(), "2026-01.events", content);
801        let report = validate_shard(&path, None);
802        assert!(report.is_ok());
803        assert_eq!(report.passed, 0);
804        assert_eq!(report.failed, 0);
805    }
806
807    #[test]
808    fn validate_shard_invalid_utf8() {
809        let tmp = TempDir::new().expect("tmpdir");
810        let path = tmp.path().join("2026-01.events");
811        let mut file = fs::File::create(&path).expect("create");
812        file.write_all(&[0xFF, 0xFE, 0xFD]).expect("write");
813        drop(file);
814
815        let report = validate_shard(&path, None);
816        assert!(!report.is_ok());
817        assert!(
818            report
819                .errors
820                .iter()
821                .any(|e| e.kind == ValidationErrorKind::InvalidUtf8)
822        );
823    }
824
825    // -----------------------------------------------------------------------
826    // validate_shard with manifest
827    // -----------------------------------------------------------------------
828
829    #[test]
830    fn validate_shard_manifest_match() {
831        let tmp = TempDir::new().expect("tmpdir");
832        let line = make_line(
833            1000,
834            "agent",
835            "itc:AQ",
836            "",
837            "item.comment",
838            "bn-a7x",
839            &sample_comment_json(),
840        );
841        let content = format!("# header\n{line}\n");
842        let content_bytes = content.as_bytes();
843        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
844        let manifest = write_manifest_file(tmp.path(), "2026-01.events", content_bytes);
845
846        let report = validate_shard(&path, Some(&manifest));
847        assert!(report.is_ok());
848    }
849
850    #[test]
851    fn validate_shard_manifest_hash_mismatch() {
852        let tmp = TempDir::new().expect("tmpdir");
853        let content = "# header\n";
854        let path = write_shard_file(tmp.path(), "2026-01.events", content);
855
856        let bad_manifest = ShardManifest {
857            shard_name: "2026-01.events".into(),
858            event_count: 0,
859            byte_len: content.len() as u64,
860            file_hash: "blake3:0000000000000000000000000000000000000000000000000000000000000000"
861                .into(),
862        };
863
864        let report = validate_shard(&path, Some(&bad_manifest));
865        assert!(!report.is_ok());
866        assert!(
867            report
868                .errors
869                .iter()
870                .any(|e| e.kind == ValidationErrorKind::ManifestMismatch)
871        );
872    }
873
874    #[test]
875    fn validate_shard_manifest_size_mismatch() {
876        let tmp = TempDir::new().expect("tmpdir");
877        let content = "# header\n";
878        let content_bytes = content.as_bytes();
879        let path = write_shard_file(tmp.path(), "2026-01.events", content);
880        let file_hash = format!("blake3:{}", blake3::hash(content_bytes).to_hex());
881
882        let bad_manifest = ShardManifest {
883            shard_name: "2026-01.events".into(),
884            event_count: 0,
885            byte_len: 999, // wrong
886            file_hash,
887        };
888
889        let report = validate_shard(&path, Some(&bad_manifest));
890        assert!(!report.is_ok());
891        assert!(
892            report
893                .errors
894                .iter()
895                .any(|e| e.kind == ValidationErrorKind::ManifestSizeMismatch)
896        );
897    }
898
899    // -----------------------------------------------------------------------
900    // validate_all
901    // -----------------------------------------------------------------------
902
903    #[test]
904    fn validate_all_multiple_shards() {
905        let tmp = TempDir::new().expect("tmpdir");
906        let line = make_line(
907            1000,
908            "agent",
909            "itc:AQ",
910            "",
911            "item.comment",
912            "bn-a7x",
913            &sample_comment_json(),
914        );
915
916        let content1 = format!("# header\n{line}\n");
917        let content2 = format!("# header\nbad line without tabs\n");
918
919        write_shard_file(tmp.path(), "2026-01.events", &content1);
920        write_shard_file(tmp.path(), "2026-02.events", &content2);
921
922        let reports = validate_all(tmp.path());
923        assert_eq!(reports.len(), 2);
924        assert!(reports[0].is_ok()); // first shard is valid
925        assert!(!reports[1].is_ok()); // second shard has error
926    }
927
928    #[test]
929    fn validate_all_empty_dir() {
930        let tmp = TempDir::new().expect("tmpdir");
931        let reports = validate_all(tmp.path());
932        assert!(reports.is_empty());
933    }
934
935    #[test]
936    fn validate_all_skips_non_shard_files() {
937        let tmp = TempDir::new().expect("tmpdir");
938        fs::write(tmp.path().join("readme.txt"), "hello").expect("write");
939        fs::write(tmp.path().join("2026-01.manifest"), "manifest").expect("write");
940
941        let reports = validate_all(tmp.path());
942        assert!(reports.is_empty());
943    }
944
945    #[test]
946    fn validate_all_loads_manifests() {
947        let tmp = TempDir::new().expect("tmpdir");
948        let line = make_line(
949            1000,
950            "agent",
951            "itc:AQ",
952            "",
953            "item.comment",
954            "bn-a7x",
955            &sample_comment_json(),
956        );
957        let content = format!("# header\n{line}\n");
958        write_shard_file(tmp.path(), "2026-01.events", &content);
959        write_manifest_file(tmp.path(), "2026-01.events", content.as_bytes());
960
961        let reports = validate_all(tmp.path());
962        assert_eq!(reports.len(), 1);
963        assert!(reports[0].is_ok());
964    }
965
966    #[test]
967    fn validate_all_nonexistent_dir() {
968        let reports = validate_all(Path::new("/nonexistent/events"));
969        assert!(reports.is_empty());
970    }
971
972    // -----------------------------------------------------------------------
973    // Multiple errors in one shard
974    // -----------------------------------------------------------------------
975
976    #[test]
977    fn validate_shard_multiple_errors() {
978        let tmp = TempDir::new().expect("tmpdir");
979        let valid_line = make_line(
980            1000,
981            "agent",
982            "itc:AQ",
983            "",
984            "item.comment",
985            "bn-a7x",
986            &sample_comment_json(),
987        );
988        let content = format!("# header\n{valid_line}\nbad1\nbad2\tbad\n{valid_line}\n");
989        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
990
991        let report = validate_shard(&path, None);
992        assert_eq!(report.passed, 2); // two valid event lines
993        assert_eq!(report.failed, 2); // two bad lines
994        assert_eq!(report.errors.len(), 2);
995    }
996
997    // -----------------------------------------------------------------------
998    // truncate_line
999    // -----------------------------------------------------------------------
1000
1001    #[test]
1002    fn truncate_line_short() {
1003        assert_eq!(truncate_line("hello"), "hello");
1004    }
1005
1006    #[test]
1007    fn truncate_line_long() {
1008        let long = "a".repeat(300);
1009        let truncated = truncate_line(&long);
1010        assert!(truncated.len() < 300);
1011        assert!(truncated.ends_with('…'));
1012    }
1013
1014    #[test]
1015    fn truncate_line_unicode_does_not_split_codepoint() {
1016        let long = "é".repeat(300);
1017        let truncated = truncate_line(&long);
1018        assert_eq!(truncated.chars().count(), 257);
1019        assert!(truncated.ends_with('…'));
1020    }
1021
1022    // -----------------------------------------------------------------------
1023    // Edge cases
1024    // -----------------------------------------------------------------------
1025
1026    #[test]
1027    fn validate_event_no_panic_on_garbage() {
1028        let long_string = "a".repeat(10_000);
1029        let inputs: Vec<&str> = vec![
1030            "",
1031            "\t",
1032            "🎉🎉🎉",
1033            &long_string,
1034            "\t\t\t\t\t\t\t",
1035            "\t\t\t\t\t\t\t\t",
1036        ];
1037        for input in inputs {
1038            let _ = validate_event(input, 1); // must not panic
1039        }
1040    }
1041
1042    #[test]
1043    fn validation_report_total() {
1044        let report = ValidationReport {
1045            passed: 5,
1046            failed: 3,
1047            errors: Vec::new(),
1048            shard_path: PathBuf::from("test"),
1049            truncated: false,
1050        };
1051        assert_eq!(report.total(), 8);
1052    }
1053
1054    #[test]
1055    fn validation_report_is_ok_with_truncation() {
1056        let report = ValidationReport {
1057            passed: 5,
1058            failed: 0,
1059            errors: Vec::new(),
1060            shard_path: PathBuf::from("test"),
1061            truncated: true,
1062        };
1063        assert!(!report.is_ok()); // truncation makes it not OK
1064    }
1065}