sherlock_nsf_parser/detect.rs
1//! File-identification: tell an NSF from a non-NSF without committing to
2//! parsing the rest of the database.
3//!
4//! The NSF file header is 6 bytes at offset 0:
5//!
6//! ```text
7//! offset width field
8//! 0 2 LSIG signature, fixed `1A 00` (LE u16 = 0x001A)
9//! 2 4 DB header size in bytes (LE u32), typically 256-4096
10//! ```
11//!
12//! There is no ASCII magic; the format was never designed with a
13//! human-readable identifier. NSF, NTF (template), NSG (web), and `.box`
14//! files (e.g. `mail.box` on a Domino server) all share this header --
15//! the file extension is the differentiator, not a magic byte. This
16//! module reports the structural family ("file looks like an NSF") and
17//! leaves extension classification to the caller.
18
19use crate::error::NsfError;
20
21/// LSIG signature byte 0 - always `0x1A` for valid NSFs.
22pub const NSF_LSIG_BYTE_0: u8 = 0x1A;
23/// LSIG signature byte 1 - always `0x00` for valid NSFs.
24pub const NSF_LSIG_BYTE_1: u8 = 0x00;
25/// Lower bound for a plausible DB-header-size field. Real Notes builds
26/// have never been observed to use smaller. Documented as 256 bytes
27/// minimum in the libnsfdb spec.
28pub const MIN_PLAUSIBLE_DB_HEADER_SIZE: u32 = 64;
29/// Upper bound for a plausible DB-header-size field. Real Notes builds
30/// have never been observed to exceed ~4 KB; we accept up to 64 KB as a
31/// safety margin while still rejecting obvious-garbage values.
32pub const MAX_PLAUSIBLE_DB_HEADER_SIZE: u32 = 65_536;
33
34/// Result of file-shape identification.
35///
36/// `Nsf` is returned only when the file-header signature matches AND the
37/// declared DB-header-size is plausible. `NotNsf` carries a short reason
38/// suitable for surfacing in an operator-facing modal.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum FileKind {
41 /// File looks like a valid NSF / NTF / NSG. Returned fields are read
42 /// from the 6-byte file header; the rest of the file has not yet
43 /// been parsed.
44 Nsf {
45 /// Database-header size in bytes as declared by the file header.
46 /// This is the size of the DBINFO region that immediately
47 /// follows the 6-byte file header.
48 db_header_size: u32,
49 },
50 /// File is not an NSF (or is too short / structurally bogus to be
51 /// one). `reason` is a single-sentence English description suitable
52 /// for surfacing in a UI.
53 NotNsf {
54 /// Plain-language reason the file was rejected.
55 reason: String,
56 },
57}
58
59impl FileKind {
60 /// True if the kind is [`FileKind::Nsf`].
61 pub fn is_nsf(&self) -> bool {
62 matches!(self, Self::Nsf { .. })
63 }
64}
65
66/// Identify a file from its leading bytes.
67///
68/// Pass at least the first 6 bytes; passing more is fine and costs
69/// nothing (extra bytes are ignored at this stage).
70///
71/// Returns [`FileKind::NotNsf`] -- not an error -- when the file is
72/// recognizably not an NSF; an operator-facing tool wants to surface a
73/// reason, not bail. The hard-error path on [`NsfError`] is reserved for
74/// callers who specifically want the structured variant; most consumers
75/// should prefer this function.
76pub fn identify_file(bytes: &[u8]) -> FileKind {
77 match identify_file_strict(bytes) {
78 Ok(kind) => kind,
79 Err(e) => FileKind::NotNsf {
80 reason: e.to_string(),
81 },
82 }
83}
84
85/// Strict variant of [`identify_file`] that returns the structured error
86/// instead of folding it into a `NotNsf { reason }`. Useful for callers
87/// that want to programmatically distinguish "too short" from "wrong
88/// signature" from "implausible header size".
89pub fn identify_file_strict(bytes: &[u8]) -> Result<FileKind, NsfError> {
90 if bytes.len() < 6 {
91 return Err(NsfError::TooShort {
92 actual: bytes.len(),
93 required: 6,
94 });
95 }
96 if bytes[0] != NSF_LSIG_BYTE_0 || bytes[1] != NSF_LSIG_BYTE_1 {
97 return Err(NsfError::BadFileSignature {
98 observed: [bytes[0], bytes[1]],
99 });
100 }
101 let db_header_size = u32::from_le_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]);
102 if db_header_size < MIN_PLAUSIBLE_DB_HEADER_SIZE
103 || db_header_size > MAX_PLAUSIBLE_DB_HEADER_SIZE
104 {
105 return Err(NsfError::BadHeaderSize {
106 size: db_header_size,
107 });
108 }
109 Ok(FileKind::Nsf { db_header_size })
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115
116 /// Minimal valid-looking file header: LSIG `1A 00`, DB header size = 256.
117 fn ok_header() -> Vec<u8> {
118 let mut v = vec![0u8; 16];
119 v[0] = 0x1A;
120 v[1] = 0x00;
121 // db_header_size = 256 (LE)
122 v[2] = 0x00;
123 v[3] = 0x01;
124 v[4] = 0x00;
125 v[5] = 0x00;
126 v
127 }
128
129 #[test]
130 fn identifies_valid_nsf_header() {
131 let h = ok_header();
132 let kind = identify_file(&h);
133 assert!(kind.is_nsf());
134 match kind {
135 FileKind::Nsf { db_header_size } => assert_eq!(db_header_size, 256),
136 _ => unreachable!(),
137 }
138 }
139
140 #[test]
141 fn rejects_too_short_file() {
142 let kind = identify_file(&[0x1A, 0x00, 0x00]);
143 assert!(!kind.is_nsf());
144 match kind {
145 FileKind::NotNsf { reason } => assert!(reason.contains("too short")),
146 _ => unreachable!(),
147 }
148 }
149
150 #[test]
151 fn rejects_bad_signature() {
152 let mut h = ok_header();
153 h[0] = 0x21; // !BDN magic - common confusion with PST
154 h[1] = 0x42;
155 let kind = identify_file(&h);
156 assert!(!kind.is_nsf());
157 match kind {
158 FileKind::NotNsf { reason } => {
159 assert!(reason.contains("21 42"));
160 assert!(reason.contains("1A 00"));
161 }
162 _ => unreachable!(),
163 }
164 }
165
166 #[test]
167 fn rejects_zero_header_size() {
168 let mut h = ok_header();
169 h[2] = 0x00;
170 h[3] = 0x00;
171 h[4] = 0x00;
172 h[5] = 0x00;
173 let kind = identify_file(&h);
174 assert!(!kind.is_nsf());
175 match kind {
176 FileKind::NotNsf { reason } => assert!(reason.contains("implausible")),
177 _ => unreachable!(),
178 }
179 }
180
181 #[test]
182 fn rejects_impossibly_large_header_size() {
183 let mut h = ok_header();
184 // 4 GB header? definitely not.
185 h[2] = 0xFF;
186 h[3] = 0xFF;
187 h[4] = 0xFF;
188 h[5] = 0xFF;
189 let kind = identify_file(&h);
190 assert!(!kind.is_nsf());
191 }
192
193 #[test]
194 fn strict_variant_returns_structured_error() {
195 let mut h = ok_header();
196 h[0] = 0xDE;
197 h[1] = 0xAD;
198 let err = identify_file_strict(&h).unwrap_err();
199 assert!(matches!(
200 err,
201 NsfError::BadFileSignature { observed: [0xDE, 0xAD] }
202 ));
203 }
204
205 #[test]
206 fn accepts_extra_bytes_after_header() {
207 let mut h = ok_header();
208 h.extend_from_slice(&[0xAA; 100_000]);
209 let kind = identify_file(&h);
210 assert!(kind.is_nsf());
211 }
212}