dar_forensic/lib.rs
1//! Forensic-grade reader and anomaly auditor for Denis Corbin DAR (Disk
2//! ARchiver) archives.
3//!
4//! Built on the pure-Rust [`dar`] parser core: this crate re-exports the
5//! reader API and adds the forensic layer — catalogue anomaly detection
6//! ([`DarAudit::audit`]) and Sleuth Kit bodyfile export
7//! ([`DarBodyfile::bodyfile`] / [`DarAudit::write_bodyfile`]) — graded onto the
8//! shared [`forensicnomicon::report`] model.
9//!
10//! Every anomaly is an *observation*, never an assertion of intent: the notes
11//! say "consistent with …", and the examiner draws the conclusions.
12//!
13//! ```no_run
14//! use dar_forensic::{DarReader, DarAudit};
15//! use std::io::Cursor;
16//!
17//! # fn demo(bytes: Vec<u8>) -> Result<(), dar_forensic::DarError> {
18//! let reader = DarReader::open(Cursor::new(bytes))?;
19//! for anomaly in reader.audit() {
20//! println!("{anomaly}");
21//! }
22//! # Ok(())
23//! # }
24//! ```
25
26// Production code is panic-free (no unwrap/expect, enforced by the workspace
27// lints); tests legitimately use them.
28#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
29
30mod bodyfile;
31mod findings;
32
33use std::io::{Read, Seek, Write};
34
35// Re-export the full parser API so `dar_forensic::*` resolves exactly as the
36// single-crate library did (DarReader, SliceReader, DarEntry, EntryKind,
37// CrcStatus, DarError, and the entry-projection helpers).
38pub use dar::{CrcStatus, DarEntry, DarError, DarReader, EntryKind, SliceReader};
39
40pub use findings::{Anomaly, AnomalyKind, Severity};
41
42/// Epoch seconds for 2100-01-01T00:00:00Z. [`DarAudit::audit`] flags entry
43/// timestamps beyond this as implausibly far in the future (clock error or
44/// tampering) — a deterministic ceiling, not a comparison against wall-clock.
45const FAR_FUTURE_EPOCH_SECS: i64 = 4_102_444_800;
46
47/// Sleuth Kit [`bodyfile`](https://wiki.sleuthkit.org/index.php?title=Body_file)
48/// formatting for a parsed [`DarEntry`].
49pub trait DarBodyfile {
50 /// One Sleuth Kit bodyfile line for this entry (no trailing newline) — the
51 /// input format for TSK's `mactime` timeline tool.
52 ///
53 /// Fields: `MD5|name|inode|mode|UID|GID|size|atime|mtime|ctime|crtime`. DAR
54 /// records no content hash, inode address, or birth time, so those are `0`;
55 /// `mode` uses TSK's `type/type+perms` form (e.g. `r/rrwxr-xr-x`); a
56 /// symlink's target is appended as ` -> target`; and `|`, `\`, and control
57 /// bytes in names are backslash-escaped so one entry stays one line.
58 fn bodyfile(&self) -> String;
59}
60
61impl DarBodyfile for DarEntry {
62 fn bodyfile(&self) -> String {
63 bodyfile::line(self)
64 }
65}
66
67/// Forensic analysis over a parsed DAR catalogue: anomaly auditing and bodyfile
68/// export. Pure metadata over the already-parsed catalogue — no archive data is
69/// read or decoded.
70pub trait DarAudit {
71 /// Audit the loaded catalogue for forensic anomalies, returning them sorted
72 /// most-severe first. See [`AnomalyKind`] for what is detected; each
73 /// [`Anomaly`] is an observation, not a conclusion.
74 fn audit(&self) -> Vec<Anomaly>;
75
76 /// Write a Sleuth Kit [bodyfile](DarBodyfile::bodyfile) — one line per
77 /// catalogue entry, newline-terminated — to `out`, for feeding TSK's
78 /// `mactime` timeline tool. Pure metadata over the parsed catalogue; no
79 /// archive data is read.
80 ///
81 /// # Errors
82 /// Returns any I/O error produced while writing to `out`.
83 fn write_bodyfile<W: Write>(&self, out: &mut W) -> std::io::Result<()>;
84}
85
86impl<R: Read + Seek> DarAudit for DarReader<R> {
87 fn audit(&self) -> Vec<Anomaly> {
88 let mut anomalies = Vec::new();
89
90 if !self.is_complete() {
91 anomalies.push(Anomaly::new(AnomalyKind::IncompleteCatalog {
92 entries_recovered: self.entry_count(),
93 }));
94 }
95
96 let mut seen: std::collections::HashSet<Vec<u8>> = std::collections::HashSet::new();
97 let mut dup_seen: std::collections::HashSet<Vec<u8>> = std::collections::HashSet::new();
98 for e in self.iter_entries() {
99 let path = String::from_utf8_lossy(&e.path).into_owned();
100
101 if e.path.first() == Some(&b'/') {
102 anomalies.push(Anomaly::new(AnomalyKind::AbsolutePath {
103 path: path.clone(),
104 }));
105 }
106 if e.path.split(|&b| b == b'/').any(|c| c == b"..") {
107 anomalies.push(Anomaly::new(AnomalyKind::ParentTraversal {
108 path: path.clone(),
109 }));
110 }
111 if e.path.iter().any(|&b| b < 0x20 || b == 0x7f) {
112 anomalies.push(Anomaly::new(AnomalyKind::ControlCharsInName {
113 path: path.clone(),
114 }));
115 }
116 for (field, t) in [("atime", e.atime), ("mtime", e.mtime)]
117 .into_iter()
118 .chain(e.ctime.map(|c| ("ctime", c)))
119 {
120 if t > FAR_FUTURE_EPOCH_SECS {
121 anomalies.push(Anomaly::new(AnomalyKind::FutureTimestamp {
122 path: path.clone(),
123 field,
124 epoch_secs: t,
125 }));
126 }
127 }
128 // Report a duplicated path once, on its second sighting.
129 if !seen.insert(e.path.clone()) && dup_seen.insert(e.path.clone()) {
130 anomalies.push(Anomaly::new(AnomalyKind::DuplicatePath { path }));
131 }
132 }
133
134 // Most-severe first; stable, so equal severities keep catalogue order.
135 anomalies.sort_by_key(|a| std::cmp::Reverse(a.severity));
136 anomalies
137 }
138
139 fn write_bodyfile<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
140 for entry in self.iter_entries() {
141 writeln!(out, "{}", entry.bodyfile())?;
142 }
143 Ok(())
144 }
145}