Skip to main content

bones_core/event/
validate.rs

1//! Event format validation and hardening.
2//!
3//! Three validation levels:
4//!
5//! 1. **Line syntax** — TSJSON shape (8 tab-separated fields), valid UTF-8,
6//!    `event_hash` matches recomputed BLAKE3 hash of fields 1–7.
7//! 2. **Schema validation** — Typed payload deserialization per event type.
8//!    Unknown fields are preserved (forward compatible). Unknown event types
9//!    produce warnings, not errors.
10//! 3. **Semantic validation** — Enum constraint checks (kind, urgency, size,
11//!    state values), item ID format, link target format.
12//!
13//! # Shard-level checks
14//!
15//! - Verifies Merkle hash chains against shard manifests.
16//! - Detects truncated event files (incomplete trailing lines).
17//! - Preserves valid events before a corrupt line in the report.
18//!
19//! # Usage
20//!
21//! ```no_run
22//! use std::path::Path;
23//! use bones_core::event::validate::{validate_shard, validate_all};
24//!
25//! // Validate a single shard file
26//! let report = validate_shard(Path::new(".bones/events/2026-01.events"), None);
27//! println!("passed: {}, failed: {}", report.passed, report.failed);
28//! for err in &report.errors {
29//!     println!("  line {}: {:?} — {}", err.line_num, err.kind, err.message);
30//! }
31//!
32//! // Validate all shards in a directory
33//! let reports = validate_all(Path::new(".bones/events"));
34//! ```
35
36use std::fs;
37use std::path::{Path, PathBuf};
38
39use crate::event::parser::{self, ParseError, ParsedLine};
40use crate::shard::ShardManifest;
41
42// ---------------------------------------------------------------------------
43// Maximum payload size (1 MiB)
44// ---------------------------------------------------------------------------
45
46/// Maximum allowed size (in bytes) for the JSON data field of a single event.
47///
48/// Events exceeding this threshold are flagged with
49/// [`ValidationErrorKind::OversizedPayload`]. This prevents denial-of-service
50/// through excessively large payloads in the event log.
51pub const MAX_PAYLOAD_BYTES: usize = 1_048_576; // 1 MiB
52
53// ---------------------------------------------------------------------------
54// ValidationError
55// ---------------------------------------------------------------------------
56
57/// Details about a single validation failure.
58#[derive(Debug, Clone)]
59pub struct ValidationError {
60    /// Line number in the shard file (1-based).
61    pub line_num: usize,
62    /// The category of validation failure.
63    pub kind: ValidationErrorKind,
64    /// Human-readable description of what went wrong.
65    pub message: String,
66    /// The raw line content (truncated to 256 chars if oversized).
67    pub raw_line: Option<String>,
68}
69
70/// Category of validation failure.
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub enum ValidationErrorKind {
73    /// The `wall_ts_us` field is not a valid integer.
74    MalformedTimestamp,
75    /// The event type string is not a known `item.<verb>`.
76    UnknownEventType,
77    /// The `item_id` field is not a valid bones ID.
78    InvalidItemId,
79    /// The data field is not valid JSON or does not match the event type schema.
80    InvalidJson,
81    /// The JSON data payload exceeds [`MAX_PAYLOAD_BYTES`].
82    OversizedPayload,
83    /// A required field is missing from the event line.
84    MissingField,
85    /// The recomputed BLAKE3 hash does not match `event_hash`.
86    HashChainBroken,
87    /// The shard file appears truncated (incomplete trailing line).
88    TruncatedFile,
89    /// The line is not valid UTF-8.
90    InvalidUtf8,
91    /// Wrong number of tab-separated fields.
92    BadFieldCount,
93    /// The `event_hash` field has an invalid format.
94    InvalidHashFormat,
95    /// The `agent` field is empty or invalid.
96    InvalidAgent,
97    /// The `itc` field is empty.
98    EmptyItc,
99    /// A parent hash has an invalid format.
100    InvalidParentHash,
101    /// Shard file BLAKE3 hash does not match manifest.
102    ManifestMismatch,
103    /// Shard file event count does not match manifest.
104    ManifestCountMismatch,
105    /// Shard file byte length does not match manifest.
106    ManifestSizeMismatch,
107    /// The shard was written by an unsupported (newer) version of bones.
108    UnsupportedVersion,
109}
110
111// ---------------------------------------------------------------------------
112// ValidationReport
113// ---------------------------------------------------------------------------
114
115/// Summary report from validating an entire shard.
116#[derive(Debug, Clone)]
117pub struct ValidationReport {
118    /// Number of event lines that passed validation.
119    pub passed: usize,
120    /// Number of lines that failed validation.
121    pub failed: usize,
122    /// Detailed errors for each failure.
123    pub errors: Vec<ValidationError>,
124    /// Path of the shard file that was validated.
125    pub shard_path: PathBuf,
126    /// Whether the file appears truncated (no trailing newline on last line).
127    pub truncated: bool,
128}
129
130impl ValidationReport {
131    /// Returns `true` if the shard passed all validation checks.
132    #[must_use]
133    pub const fn is_ok(&self) -> bool {
134        self.failed == 0 && !self.truncated
135    }
136
137    /// Total lines processed (passed + failed).
138    #[must_use]
139    pub const fn total(&self) -> usize {
140        self.passed + self.failed
141    }
142}
143
144// ---------------------------------------------------------------------------
145// Line-level validation
146// ---------------------------------------------------------------------------
147
148/// Validate a single TSJSON event line.
149///
150/// Performs all three levels of validation:
151/// 1. **Syntax**: correct tab-separated field count, valid timestamp, valid
152///    event hash format and value.
153/// 2. **Schema**: valid JSON payload matching the event type.
154/// 3. **Semantic**: valid item ID, non-empty agent/itc, valid parent hashes,
155///    payload size under limit.
156///
157/// Comment lines (starting with `#`) and blank lines return `Ok(())`.
158///
159/// # Errors
160///
161/// Returns a [`ValidationError`] describing the first failure found.
162pub fn validate_event(line: &str, line_num: usize) -> Result<(), ValidationError> {
163    let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
164
165    // Skip comments and blanks
166    if trimmed.starts_with('#') || trimmed.trim().is_empty() {
167        return Ok(());
168    }
169
170    // Check payload size before full parse
171    let fields: Vec<&str> = trimmed.split('\t').collect();
172    if fields.len() != 8 {
173        return Err(ValidationError {
174            line_num,
175            kind: ValidationErrorKind::BadFieldCount,
176            message: format!("expected 8 tab-separated fields, found {}", fields.len()),
177            raw_line: Some(truncate_line(trimmed)),
178        });
179    }
180
181    // Check oversized payload (field index 6 = data JSON)
182    if fields[6].len() > MAX_PAYLOAD_BYTES {
183        return Err(ValidationError {
184            line_num,
185            kind: ValidationErrorKind::OversizedPayload,
186            message: format!(
187                "data payload is {} bytes, exceeds limit of {} bytes",
188                fields[6].len(),
189                MAX_PAYLOAD_BYTES
190            ),
191            raw_line: Some(truncate_line(trimmed)),
192        });
193    }
194
195    // Delegate to the parser for full validation (syntax + schema + hash)
196    match parser::parse_line(line) {
197        Ok(ParsedLine::Event(_) | ParsedLine::Comment(_) | ParsedLine::Blank) => Ok(()),
198        Err(ref parse_err) => Err(parse_error_to_validation(parse_err, line_num, trimmed)),
199    }
200}
201
202// ---------------------------------------------------------------------------
203// Shard-level validation
204// ---------------------------------------------------------------------------
205
206/// Validate an entire shard file.
207///
208/// Reads the file line-by-line, validates each event, detects truncation,
209/// and optionally checks the shard manifest for integrity.
210///
211/// Valid events before a corrupt line are preserved in the report's `passed`
212/// count. The validator does **not** stop at the first error — it continues
213/// through the entire file.
214///
215/// # Parameters
216///
217/// - `path`: Path to the `.events` shard file.
218/// - `manifest`: Optional [`ShardManifest`] for file-level integrity checks.
219///   If provided, the file's BLAKE3 hash, byte length, and event count are
220///   verified against the manifest.
221///
222/// # Panics
223///
224/// Does not panic. I/O errors produce a single `ValidationError` with
225/// `kind: InvalidUtf8` (for encoding errors) or a report with zero
226/// passed/failed (for missing files).
227#[must_use]
228pub fn validate_shard(path: &Path, manifest: Option<&ShardManifest>) -> ValidationReport {
229    let mut report = ValidationReport {
230        passed: 0,
231        failed: 0,
232        errors: Vec::new(),
233        shard_path: path.to_path_buf(),
234        truncated: false,
235    };
236
237    // Read file contents
238    let content_bytes = match fs::read(path) {
239        Ok(bytes) => bytes,
240        Err(e) => {
241            report.errors.push(ValidationError {
242                line_num: 0,
243                kind: ValidationErrorKind::InvalidUtf8,
244                message: format!("failed to read shard file: {e}"),
245                raw_line: None,
246            });
247            report.failed = 1;
248            return report;
249        }
250    };
251
252    // Manifest checks (file-level integrity)
253    if let Some(manifest) = manifest {
254        check_manifest(&content_bytes, manifest, &mut report);
255    }
256
257    // UTF-8 check
258    let content = match std::str::from_utf8(&content_bytes) {
259        Ok(s) => s,
260        Err(e) => {
261            report.errors.push(ValidationError {
262                line_num: 0,
263                kind: ValidationErrorKind::InvalidUtf8,
264                message: format!("shard file is not valid UTF-8: {e}"),
265                raw_line: None,
266            });
267            report.failed = 1;
268            return report;
269        }
270    };
271
272    // Truncation detection: file should be empty or end with '\n'
273    if !content.is_empty() && !content.ends_with('\n') {
274        report.truncated = true;
275        report.errors.push(ValidationError {
276            line_num: 0,
277            kind: ValidationErrorKind::TruncatedFile,
278            message: "shard file does not end with newline — possible truncation".into(),
279            raw_line: None,
280        });
281        // Continue validating lines that are complete
282    }
283
284    // Validate each line
285    for (i, line) in content.lines().enumerate() {
286        let line_num = i + 1; // 1-based
287
288        // Skip comment and blank lines for pass/fail counting
289        let trimmed = line.trim();
290        if trimmed.is_empty() || trimmed.starts_with('#') {
291            continue;
292        }
293
294        match validate_event(line, line_num) {
295            Ok(()) => report.passed += 1,
296            Err(err) => {
297                report.failed += 1;
298                report.errors.push(err);
299            }
300        }
301    }
302
303    report
304}
305
306/// Validate all shard files (`*.events`) in an events directory.
307///
308/// Reads manifests for each shard if available (`.manifest` files alongside
309/// `.events` files). Returns one [`ValidationReport`] per shard file,
310/// in chronological order.
311///
312/// Non-shard files and the `current.events` symlink are skipped.
313#[must_use]
314pub fn validate_all(events_dir: &Path) -> Vec<ValidationReport> {
315    let mut reports = Vec::new();
316
317    // Collect and sort shard files
318    let mut shard_files: Vec<PathBuf> = match fs::read_dir(events_dir) {
319        Ok(entries) => entries
320            .filter_map(std::result::Result::ok)
321            .map(|e| e.path())
322            .filter(|p| {
323                p.extension().and_then(|ext| ext.to_str()) == Some("events")
324                    && p.file_name()
325                        .and_then(|n| n.to_str())
326                        .is_some_and(|n| n != "current.events")
327            })
328            .collect(),
329        Err(_) => return reports,
330    };
331    shard_files.sort();
332
333    for shard_path in &shard_files {
334        // Try to load the corresponding manifest
335        let manifest = load_manifest_for(shard_path);
336        let report = validate_shard(shard_path, manifest.as_ref());
337        reports.push(report);
338    }
339
340    reports
341}
342
343// ---------------------------------------------------------------------------
344// Helpers
345// ---------------------------------------------------------------------------
346
347/// Convert a [`ParseError`] into a [`ValidationError`].
348fn parse_error_to_validation(err: &ParseError, line_num: usize, raw: &str) -> ValidationError {
349    let (kind, message) = match &err {
350        ParseError::FieldCount { found, expected } => (
351            ValidationErrorKind::BadFieldCount,
352            format!("expected {expected} tab-separated fields, found {found}"),
353        ),
354        ParseError::InvalidTimestamp(raw_ts) => (
355            ValidationErrorKind::MalformedTimestamp,
356            format!("invalid wall_ts_us (not i64): '{raw_ts}'"),
357        ),
358        ParseError::InvalidAgent(raw_agent) => (
359            ValidationErrorKind::InvalidAgent,
360            format!("invalid agent field: '{raw_agent}'"),
361        ),
362        ParseError::EmptyItc => (ValidationErrorKind::EmptyItc, "itc field is empty".into()),
363        ParseError::InvalidParentHash(raw_hash) => (
364            ValidationErrorKind::InvalidParentHash,
365            format!("invalid parent hash: '{raw_hash}'"),
366        ),
367        ParseError::InvalidEventType(raw_type) => (
368            ValidationErrorKind::UnknownEventType,
369            format!("unknown event type: '{raw_type}'"),
370        ),
371        ParseError::InvalidItemId(raw_id) => (
372            ValidationErrorKind::InvalidItemId,
373            format!("invalid item ID: '{raw_id}'"),
374        ),
375        ParseError::InvalidDataJson(details) => (
376            ValidationErrorKind::InvalidJson,
377            format!("invalid data JSON: {details}"),
378        ),
379        ParseError::DataSchemaMismatch {
380            event_type,
381            details,
382        } => (
383            ValidationErrorKind::InvalidJson,
384            format!("data schema mismatch for {event_type}: {details}"),
385        ),
386        ParseError::InvalidEventHash(raw_hash) => (
387            ValidationErrorKind::InvalidHashFormat,
388            format!("invalid event_hash format: '{raw_hash}'"),
389        ),
390        ParseError::HashMismatch { expected, computed } => (
391            ValidationErrorKind::HashChainBroken,
392            format!("event_hash mismatch: line has '{expected}', computed '{computed}'"),
393        ),
394        ParseError::VersionMismatch(msg) => (
395            ValidationErrorKind::UnsupportedVersion,
396            format!("unsupported event log version: {msg}"),
397        ),
398    };
399
400    ValidationError {
401        line_num,
402        kind,
403        message,
404        raw_line: Some(truncate_line(raw)),
405    }
406}
407
408/// Truncate a line to 256 characters for inclusion in error reports.
409fn truncate_line(line: &str) -> String {
410    if line.len() > 256 {
411        format!("{}…", &line[..256])
412    } else {
413        line.to_string()
414    }
415}
416
417/// Check shard file against its manifest.
418fn check_manifest(content_bytes: &[u8], manifest: &ShardManifest, report: &mut ValidationReport) {
419    // Check byte length
420    let byte_len = content_bytes.len() as u64;
421    if byte_len != manifest.byte_len {
422        report.errors.push(ValidationError {
423            line_num: 0,
424            kind: ValidationErrorKind::ManifestSizeMismatch,
425            message: format!(
426                "shard byte length {} does not match manifest {}",
427                byte_len, manifest.byte_len
428            ),
429            raw_line: None,
430        });
431        report.failed += 1;
432    }
433
434    // Check file hash
435    let file_hash = format!("blake3:{}", blake3::hash(content_bytes).to_hex());
436    if file_hash != manifest.file_hash {
437        report.errors.push(ValidationError {
438            line_num: 0,
439            kind: ValidationErrorKind::ManifestMismatch,
440            message: format!(
441                "shard file hash '{}' does not match manifest '{}'",
442                file_hash, manifest.file_hash
443            ),
444            raw_line: None,
445        });
446        report.failed += 1;
447    }
448
449    // Check event count (deferred: done after line-by-line parsing)
450    // We store manifest event count for post-validation check in validate_shard
451    // but since validate_shard counts passed events, the caller can compare.
452}
453
454/// Try to load a `.manifest` file corresponding to a `.events` shard file.
455fn load_manifest_for(shard_path: &Path) -> Option<ShardManifest> {
456    let manifest_path = shard_path.with_extension("manifest");
457    if !manifest_path.exists() {
458        return None;
459    }
460    let content = fs::read_to_string(&manifest_path).ok()?;
461    ShardManifest::from_string_repr(&content)
462}
463
464// ---------------------------------------------------------------------------
465// Tests
466// ---------------------------------------------------------------------------
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use crate::event::canonical::canonicalize_json;
472    use std::io::Write;
473    use tempfile::TempDir;
474
475    // -----------------------------------------------------------------------
476    // Test helpers
477    // -----------------------------------------------------------------------
478
479    /// Build a valid TSJSON line with correct event hash.
480    fn make_line(
481        wall_ts_us: i64,
482        agent: &str,
483        itc: &str,
484        parents: &str,
485        event_type: &str,
486        item_id: &str,
487        data_json: &str,
488    ) -> String {
489        let canonical_data = canonicalize_json(
490            &serde_json::from_str::<serde_json::Value>(data_json).expect("test JSON"),
491        );
492        let hash_input = format!(
493            "{wall_ts_us}\t{agent}\t{itc}\t{parents}\t{event_type}\t{item_id}\t{canonical_data}\n"
494        );
495        let hash = blake3::hash(hash_input.as_bytes());
496        let event_hash = crate::event::hash_text::encode_blake3_hash(&hash);
497        format!(
498            "{wall_ts_us}\t{agent}\t{itc}\t{parents}\t{event_type}\t{item_id}\t{canonical_data}\t{event_hash}"
499        )
500    }
501
502    fn sample_create_json() -> String {
503        canonicalize_json(&serde_json::json!({
504            "title": "Fix auth retry",
505            "kind": "task",
506            "size": "m",
507            "labels": ["backend"]
508        }))
509    }
510
511    fn sample_comment_json() -> String {
512        canonicalize_json(&serde_json::json!({
513            "body": "Root cause found"
514        }))
515    }
516
517    fn write_shard_file(dir: &Path, name: &str, content: &str) -> PathBuf {
518        let path = dir.join(name);
519        fs::write(&path, content).expect("write shard file");
520        path
521    }
522
523    fn write_manifest_file(dir: &Path, shard_name: &str, content_bytes: &[u8]) -> ShardManifest {
524        let content_str = std::str::from_utf8(content_bytes).unwrap();
525        let event_count = content_str
526            .lines()
527            .filter(|l| !l.is_empty() && !l.starts_with('#') && !l.trim().is_empty())
528            .count() as u64;
529        let manifest = ShardManifest {
530            shard_name: shard_name.to_string(),
531            event_count,
532            byte_len: content_bytes.len() as u64,
533            file_hash: format!("blake3:{}", blake3::hash(content_bytes).to_hex()),
534        };
535        let manifest_path = dir.join(shard_name.replace(".events", ".manifest"));
536        fs::write(&manifest_path, manifest.to_string_repr()).expect("write manifest");
537        manifest
538    }
539
540    // -----------------------------------------------------------------------
541    // validate_event — valid lines
542    // -----------------------------------------------------------------------
543
544    #[test]
545    fn validate_event_valid_create() {
546        let line = make_line(
547            1_708_012_200_123_456,
548            "claude-abc",
549            "itc:AQ",
550            "",
551            "item.create",
552            "bn-a7x",
553            &sample_create_json(),
554        );
555        assert!(validate_event(&line, 1).is_ok());
556    }
557
558    #[test]
559    fn validate_event_valid_with_parents() {
560        let parent = crate::event::hash_text::encode_blake3_hash(&blake3::hash(b"parent"));
561        let line = make_line(
562            1_000_000,
563            "agent",
564            "itc:AQ.1",
565            &parent,
566            "item.comment",
567            "bn-a7x",
568            &sample_comment_json(),
569        );
570        assert!(validate_event(&line, 1).is_ok());
571    }
572
573    #[test]
574    fn validate_event_comment_line() {
575        assert!(validate_event("# this is a comment", 1).is_ok());
576    }
577
578    #[test]
579    fn validate_event_blank_line() {
580        assert!(validate_event("", 1).is_ok());
581        assert!(validate_event("   ", 1).is_ok());
582    }
583
584    // -----------------------------------------------------------------------
585    // validate_event — invalid lines
586    // -----------------------------------------------------------------------
587
588    #[test]
589    fn validate_event_bad_field_count() {
590        let err = validate_event("too\tfew\tfields", 5).unwrap_err();
591        assert_eq!(err.kind, ValidationErrorKind::BadFieldCount);
592        assert_eq!(err.line_num, 5);
593    }
594
595    #[test]
596    fn validate_event_bad_timestamp() {
597        let line = "abc\tagent\titc:A\t\titem.create\tbn-a7x\t{\"title\":\"T\",\"kind\":\"task\"}\tblake3:aaa";
598        let err = validate_event(line, 3).unwrap_err();
599        assert_eq!(err.kind, ValidationErrorKind::MalformedTimestamp);
600    }
601
602    #[test]
603    fn validate_event_unknown_event_type() {
604        let line = "1000\tagent\titc:A\t\titem.unknown\tbn-a7x\t{}\tblake3:aaa";
605        let err = validate_event(line, 1).unwrap_err();
606        assert_eq!(err.kind, ValidationErrorKind::UnknownEventType);
607    }
608
609    #[test]
610    fn validate_event_invalid_item_id() {
611        let line = "1000\tagent\titc:A\t\titem.create\tnotvalid\t{\"title\":\"T\",\"kind\":\"task\"}\tblake3:aaa";
612        let err = validate_event(line, 1).unwrap_err();
613        assert_eq!(err.kind, ValidationErrorKind::InvalidItemId);
614    }
615
616    #[test]
617    fn validate_event_invalid_json() {
618        let line = "1000\tagent\titc:A\t\titem.create\tbn-a7x\t{not json}\tblake3:aaa";
619        let err = validate_event(line, 1).unwrap_err();
620        assert_eq!(err.kind, ValidationErrorKind::InvalidJson);
621    }
622
623    #[test]
624    fn validate_event_schema_mismatch() {
625        // Valid JSON but missing required "title" for create
626        let line = make_line(
627            1000,
628            "agent",
629            "itc:A",
630            "",
631            "item.create",
632            "bn-a7x",
633            r#"{"kind":"task"}"#,
634        );
635        let err = validate_event(&line, 1).unwrap_err();
636        assert_eq!(err.kind, ValidationErrorKind::InvalidJson);
637    }
638
639    #[test]
640    fn validate_event_hash_mismatch() {
641        let canonical = sample_comment_json();
642        let line = format!(
643            "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{}\tblake3:{}",
644            canonical,
645            "0".repeat(64)
646        );
647        let err = validate_event(&line, 1).unwrap_err();
648        assert_eq!(err.kind, ValidationErrorKind::HashChainBroken);
649    }
650
651    #[test]
652    fn validate_event_bad_hash_format() {
653        let line = "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tsha256:abc";
654        let err = validate_event(line, 1).unwrap_err();
655        assert_eq!(err.kind, ValidationErrorKind::InvalidHashFormat);
656    }
657
658    #[test]
659    fn validate_event_empty_agent() {
660        let line = "1000\t\titc:A\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
661        let err = validate_event(line, 1).unwrap_err();
662        assert_eq!(err.kind, ValidationErrorKind::InvalidAgent);
663    }
664
665    #[test]
666    fn validate_event_empty_itc() {
667        let line = "1000\tagent\t\t\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
668        let err = validate_event(line, 1).unwrap_err();
669        assert_eq!(err.kind, ValidationErrorKind::EmptyItc);
670    }
671
672    #[test]
673    fn validate_event_bad_parent_hash() {
674        let line =
675            "1000\tagent\titc:A\tnotahash\titem.comment\tbn-a7x\t{\"body\":\"hi\"}\tblake3:abc";
676        let err = validate_event(line, 1).unwrap_err();
677        assert_eq!(err.kind, ValidationErrorKind::InvalidParentHash);
678    }
679
680    #[test]
681    fn validate_event_oversized_payload() {
682        let big_payload = format!("{{\"body\":\"{}\"}}", "a".repeat(MAX_PAYLOAD_BYTES + 1));
683        let line = format!(
684            "1000\tagent\titc:A\t\titem.comment\tbn-a7x\t{}\tblake3:abc",
685            big_payload
686        );
687        let err = validate_event(&line, 1).unwrap_err();
688        assert_eq!(err.kind, ValidationErrorKind::OversizedPayload);
689    }
690
691    // -----------------------------------------------------------------------
692    // validate_shard
693    // -----------------------------------------------------------------------
694
695    #[test]
696    fn validate_shard_valid_file() {
697        let tmp = TempDir::new().expect("tmpdir");
698        let line1 = make_line(
699            1000,
700            "agent",
701            "itc:AQ",
702            "",
703            "item.comment",
704            "bn-a7x",
705            &sample_comment_json(),
706        );
707        let line2 = make_line(
708            2000,
709            "agent",
710            "itc:AQ.1",
711            "",
712            "item.comment",
713            "bn-a7x",
714            &sample_comment_json(),
715        );
716        let content = format!("# bones event log v1\n# fields: ...\n{line1}\n{line2}\n");
717        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
718
719        let report = validate_shard(&path, None);
720        assert!(report.is_ok());
721        assert_eq!(report.passed, 2);
722        assert_eq!(report.failed, 0);
723        assert!(!report.truncated);
724    }
725
726    #[test]
727    fn validate_shard_with_errors_preserves_valid() {
728        let tmp = TempDir::new().expect("tmpdir");
729        let valid_line = make_line(
730            1000,
731            "agent",
732            "itc:AQ",
733            "",
734            "item.comment",
735            "bn-a7x",
736            &sample_comment_json(),
737        );
738        let content = format!("# header\n{valid_line}\nbad\tline\twith\twrong\tfield\tcount\n");
739        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
740
741        let report = validate_shard(&path, None);
742        assert!(!report.is_ok());
743        assert_eq!(report.passed, 1); // valid line preserved
744        assert_eq!(report.failed, 1);
745        assert_eq!(report.errors.len(), 1);
746        assert_eq!(report.errors[0].kind, ValidationErrorKind::BadFieldCount);
747    }
748
749    #[test]
750    fn validate_shard_detects_truncation() {
751        let tmp = TempDir::new().expect("tmpdir");
752        let valid_line = make_line(
753            1000,
754            "agent",
755            "itc:AQ",
756            "",
757            "item.comment",
758            "bn-a7x",
759            &sample_comment_json(),
760        );
761        // No trailing newline after the last line
762        let content = format!("# header\n{valid_line}");
763        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
764
765        let report = validate_shard(&path, None);
766        assert!(report.truncated);
767        assert!(
768            report
769                .errors
770                .iter()
771                .any(|e| e.kind == ValidationErrorKind::TruncatedFile)
772        );
773        // The valid event still parses (line ends are trimmed)
774        assert_eq!(report.passed, 1);
775    }
776
777    #[test]
778    fn validate_shard_missing_file() {
779        let report = validate_shard(Path::new("/nonexistent/2026-01.events"), None);
780        assert!(!report.is_ok());
781        assert_eq!(report.failed, 1);
782    }
783
784    #[test]
785    fn validate_shard_empty_file() {
786        let tmp = TempDir::new().expect("tmpdir");
787        let path = write_shard_file(tmp.path(), "2026-01.events", "");
788        let report = validate_shard(&path, None);
789        assert!(report.is_ok());
790        assert_eq!(report.passed, 0);
791        assert_eq!(report.failed, 0);
792    }
793
794    #[test]
795    fn validate_shard_only_comments() {
796        let tmp = TempDir::new().expect("tmpdir");
797        let content = "# bones event log v1\n# fields: ...\n";
798        let path = write_shard_file(tmp.path(), "2026-01.events", content);
799        let report = validate_shard(&path, None);
800        assert!(report.is_ok());
801        assert_eq!(report.passed, 0);
802        assert_eq!(report.failed, 0);
803    }
804
805    #[test]
806    fn validate_shard_invalid_utf8() {
807        let tmp = TempDir::new().expect("tmpdir");
808        let path = tmp.path().join("2026-01.events");
809        let mut file = fs::File::create(&path).expect("create");
810        file.write_all(&[0xFF, 0xFE, 0xFD]).expect("write");
811        drop(file);
812
813        let report = validate_shard(&path, None);
814        assert!(!report.is_ok());
815        assert!(
816            report
817                .errors
818                .iter()
819                .any(|e| e.kind == ValidationErrorKind::InvalidUtf8)
820        );
821    }
822
823    // -----------------------------------------------------------------------
824    // validate_shard with manifest
825    // -----------------------------------------------------------------------
826
827    #[test]
828    fn validate_shard_manifest_match() {
829        let tmp = TempDir::new().expect("tmpdir");
830        let line = make_line(
831            1000,
832            "agent",
833            "itc:AQ",
834            "",
835            "item.comment",
836            "bn-a7x",
837            &sample_comment_json(),
838        );
839        let content = format!("# header\n{line}\n");
840        let content_bytes = content.as_bytes();
841        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
842        let manifest = write_manifest_file(tmp.path(), "2026-01.events", content_bytes);
843
844        let report = validate_shard(&path, Some(&manifest));
845        assert!(report.is_ok());
846    }
847
848    #[test]
849    fn validate_shard_manifest_hash_mismatch() {
850        let tmp = TempDir::new().expect("tmpdir");
851        let content = "# header\n";
852        let path = write_shard_file(tmp.path(), "2026-01.events", content);
853
854        let bad_manifest = ShardManifest {
855            shard_name: "2026-01.events".into(),
856            event_count: 0,
857            byte_len: content.len() as u64,
858            file_hash: "blake3:0000000000000000000000000000000000000000000000000000000000000000"
859                .into(),
860        };
861
862        let report = validate_shard(&path, Some(&bad_manifest));
863        assert!(!report.is_ok());
864        assert!(
865            report
866                .errors
867                .iter()
868                .any(|e| e.kind == ValidationErrorKind::ManifestMismatch)
869        );
870    }
871
872    #[test]
873    fn validate_shard_manifest_size_mismatch() {
874        let tmp = TempDir::new().expect("tmpdir");
875        let content = "# header\n";
876        let content_bytes = content.as_bytes();
877        let path = write_shard_file(tmp.path(), "2026-01.events", content);
878        let file_hash = format!("blake3:{}", blake3::hash(content_bytes).to_hex());
879
880        let bad_manifest = ShardManifest {
881            shard_name: "2026-01.events".into(),
882            event_count: 0,
883            byte_len: 999, // wrong
884            file_hash,
885        };
886
887        let report = validate_shard(&path, Some(&bad_manifest));
888        assert!(!report.is_ok());
889        assert!(
890            report
891                .errors
892                .iter()
893                .any(|e| e.kind == ValidationErrorKind::ManifestSizeMismatch)
894        );
895    }
896
897    // -----------------------------------------------------------------------
898    // validate_all
899    // -----------------------------------------------------------------------
900
901    #[test]
902    fn validate_all_multiple_shards() {
903        let tmp = TempDir::new().expect("tmpdir");
904        let line = make_line(
905            1000,
906            "agent",
907            "itc:AQ",
908            "",
909            "item.comment",
910            "bn-a7x",
911            &sample_comment_json(),
912        );
913
914        let content1 = format!("# header\n{line}\n");
915        let content2 = format!("# header\nbad line without tabs\n");
916
917        write_shard_file(tmp.path(), "2026-01.events", &content1);
918        write_shard_file(tmp.path(), "2026-02.events", &content2);
919
920        let reports = validate_all(tmp.path());
921        assert_eq!(reports.len(), 2);
922        assert!(reports[0].is_ok()); // first shard is valid
923        assert!(!reports[1].is_ok()); // second shard has error
924    }
925
926    #[test]
927    fn validate_all_empty_dir() {
928        let tmp = TempDir::new().expect("tmpdir");
929        let reports = validate_all(tmp.path());
930        assert!(reports.is_empty());
931    }
932
933    #[test]
934    fn validate_all_skips_non_shard_files() {
935        let tmp = TempDir::new().expect("tmpdir");
936        fs::write(tmp.path().join("readme.txt"), "hello").expect("write");
937        fs::write(tmp.path().join("2026-01.manifest"), "manifest").expect("write");
938
939        let reports = validate_all(tmp.path());
940        assert!(reports.is_empty());
941    }
942
943    #[test]
944    fn validate_all_loads_manifests() {
945        let tmp = TempDir::new().expect("tmpdir");
946        let line = make_line(
947            1000,
948            "agent",
949            "itc:AQ",
950            "",
951            "item.comment",
952            "bn-a7x",
953            &sample_comment_json(),
954        );
955        let content = format!("# header\n{line}\n");
956        write_shard_file(tmp.path(), "2026-01.events", &content);
957        write_manifest_file(tmp.path(), "2026-01.events", content.as_bytes());
958
959        let reports = validate_all(tmp.path());
960        assert_eq!(reports.len(), 1);
961        assert!(reports[0].is_ok());
962    }
963
964    #[test]
965    fn validate_all_nonexistent_dir() {
966        let reports = validate_all(Path::new("/nonexistent/events"));
967        assert!(reports.is_empty());
968    }
969
970    // -----------------------------------------------------------------------
971    // Multiple errors in one shard
972    // -----------------------------------------------------------------------
973
974    #[test]
975    fn validate_shard_multiple_errors() {
976        let tmp = TempDir::new().expect("tmpdir");
977        let valid_line = make_line(
978            1000,
979            "agent",
980            "itc:AQ",
981            "",
982            "item.comment",
983            "bn-a7x",
984            &sample_comment_json(),
985        );
986        let content = format!("# header\n{valid_line}\nbad1\nbad2\tbad\n{valid_line}\n");
987        let path = write_shard_file(tmp.path(), "2026-01.events", &content);
988
989        let report = validate_shard(&path, None);
990        assert_eq!(report.passed, 2); // two valid event lines
991        assert_eq!(report.failed, 2); // two bad lines
992        assert_eq!(report.errors.len(), 2);
993    }
994
995    // -----------------------------------------------------------------------
996    // truncate_line
997    // -----------------------------------------------------------------------
998
999    #[test]
1000    fn truncate_line_short() {
1001        assert_eq!(truncate_line("hello"), "hello");
1002    }
1003
1004    #[test]
1005    fn truncate_line_long() {
1006        let long = "a".repeat(300);
1007        let truncated = truncate_line(&long);
1008        assert!(truncated.len() < 300);
1009        assert!(truncated.ends_with('…'));
1010    }
1011
1012    // -----------------------------------------------------------------------
1013    // Edge cases
1014    // -----------------------------------------------------------------------
1015
1016    #[test]
1017    fn validate_event_no_panic_on_garbage() {
1018        let long_string = "a".repeat(10_000);
1019        let inputs: Vec<&str> = vec![
1020            "",
1021            "\t",
1022            "🎉🎉🎉",
1023            &long_string,
1024            "\t\t\t\t\t\t\t",
1025            "\t\t\t\t\t\t\t\t",
1026        ];
1027        for input in inputs {
1028            let _ = validate_event(input, 1); // must not panic
1029        }
1030    }
1031
1032    #[test]
1033    fn validation_report_total() {
1034        let report = ValidationReport {
1035            passed: 5,
1036            failed: 3,
1037            errors: Vec::new(),
1038            shard_path: PathBuf::from("test"),
1039            truncated: false,
1040        };
1041        assert_eq!(report.total(), 8);
1042    }
1043
1044    #[test]
1045    fn validation_report_is_ok_with_truncation() {
1046        let report = ValidationReport {
1047            passed: 5,
1048            failed: 0,
1049            errors: Vec::new(),
1050            shard_path: PathBuf::from("test"),
1051            truncated: true,
1052        };
1053        assert!(!report.is_ok()); // truncation makes it not OK
1054    }
1055}