Skip to main content

sherlock_nsf_parser/
detect.rs

1//! File-identification: tell an NSF from a non-NSF without committing to
2//! parsing the rest of the database.
3//!
4//! The NSF file header is 6 bytes at offset 0:
5//!
6//! ```text
7//! offset  width  field
8//! 0       2      LSIG signature, fixed `1A 00` (LE u16 = 0x001A)
9//! 2       4      DB header size in bytes (LE u32), typically 256-4096
10//! ```
11//!
12//! There is no ASCII magic; the format was never designed with a
13//! human-readable identifier. NSF, NTF (template), NSG (web), and `.box`
14//! files (e.g. `mail.box` on a Domino server) all share this header --
15//! the file extension is the differentiator, not a magic byte. This
16//! module reports the structural family ("file looks like an NSF") and
17//! leaves extension classification to the caller.
18
19use crate::error::NsfError;
20
21/// LSIG signature byte 0 - always `0x1A` for valid NSFs.
22pub const NSF_LSIG_BYTE_0: u8 = 0x1A;
23/// LSIG signature byte 1 - always `0x00` for valid NSFs.
24pub const NSF_LSIG_BYTE_1: u8 = 0x00;
25/// Lower bound for a plausible DB-header-size field. Real Notes builds
26/// have never been observed to use smaller. Documented as 256 bytes
27/// minimum in the libnsfdb spec.
28pub const MIN_PLAUSIBLE_DB_HEADER_SIZE: u32 = 64;
29/// Upper bound for a plausible DB-header-size field. Real Notes builds
30/// have never been observed to exceed ~4 KB; we accept up to 64 KB as a
31/// safety margin while still rejecting obvious-garbage values.
32pub const MAX_PLAUSIBLE_DB_HEADER_SIZE: u32 = 65_536;
33
34/// Result of file-shape identification.
35///
36/// `Nsf` is returned only when the file-header signature matches AND the
37/// declared DB-header-size is plausible. `NotNsf` carries a short reason
38/// suitable for surfacing in an operator-facing modal.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum FileKind {
41    /// File looks like a valid NSF / NTF / NSG. Returned fields are read
42    /// from the 6-byte file header; the rest of the file has not yet
43    /// been parsed.
44    Nsf {
45        /// Database-header size in bytes as declared by the file header.
46        /// This is the size of the DBINFO region that immediately
47        /// follows the 6-byte file header.
48        db_header_size: u32,
49    },
50    /// File is not an NSF (or is too short / structurally bogus to be
51    /// one). `reason` is a single-sentence English description suitable
52    /// for surfacing in a UI.
53    NotNsf {
54        /// Plain-language reason the file was rejected.
55        reason: String,
56    },
57}
58
59impl FileKind {
60    /// True if the kind is [`FileKind::Nsf`].
61    pub fn is_nsf(&self) -> bool {
62        matches!(self, Self::Nsf { .. })
63    }
64}
65
66/// Identify a file from its leading bytes.
67///
68/// Pass at least the first 6 bytes; passing more is fine and costs
69/// nothing (extra bytes are ignored at this stage).
70///
71/// Returns [`FileKind::NotNsf`] -- not an error -- when the file is
72/// recognizably not an NSF; an operator-facing tool wants to surface a
73/// reason, not bail. The hard-error path on [`NsfError`] is reserved for
74/// callers who specifically want the structured variant; most consumers
75/// should prefer this function.
76pub fn identify_file(bytes: &[u8]) -> FileKind {
77    match identify_file_strict(bytes) {
78        Ok(kind) => kind,
79        Err(e) => FileKind::NotNsf {
80            reason: e.to_string(),
81        },
82    }
83}
84
85/// Strict variant of [`identify_file`] that returns the structured error
86/// instead of folding it into a `NotNsf { reason }`. Useful for callers
87/// that want to programmatically distinguish "too short" from "wrong
88/// signature" from "implausible header size".
89pub fn identify_file_strict(bytes: &[u8]) -> Result<FileKind, NsfError> {
90    if bytes.len() < 6 {
91        return Err(NsfError::TooShort {
92            actual: bytes.len(),
93            required: 6,
94        });
95    }
96    if bytes[0] != NSF_LSIG_BYTE_0 || bytes[1] != NSF_LSIG_BYTE_1 {
97        return Err(NsfError::BadFileSignature {
98            observed: [bytes[0], bytes[1]],
99        });
100    }
101    let db_header_size = u32::from_le_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]);
102    if db_header_size < MIN_PLAUSIBLE_DB_HEADER_SIZE
103        || db_header_size > MAX_PLAUSIBLE_DB_HEADER_SIZE
104    {
105        return Err(NsfError::BadHeaderSize {
106            size: db_header_size,
107        });
108    }
109    Ok(FileKind::Nsf { db_header_size })
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    /// Minimal valid-looking file header: LSIG `1A 00`, DB header size = 256.
117    fn ok_header() -> Vec<u8> {
118        let mut v = vec![0u8; 16];
119        v[0] = 0x1A;
120        v[1] = 0x00;
121        // db_header_size = 256 (LE)
122        v[2] = 0x00;
123        v[3] = 0x01;
124        v[4] = 0x00;
125        v[5] = 0x00;
126        v
127    }
128
129    #[test]
130    fn identifies_valid_nsf_header() {
131        let h = ok_header();
132        let kind = identify_file(&h);
133        assert!(kind.is_nsf());
134        match kind {
135            FileKind::Nsf { db_header_size } => assert_eq!(db_header_size, 256),
136            _ => unreachable!(),
137        }
138    }
139
140    #[test]
141    fn rejects_too_short_file() {
142        let kind = identify_file(&[0x1A, 0x00, 0x00]);
143        assert!(!kind.is_nsf());
144        match kind {
145            FileKind::NotNsf { reason } => assert!(reason.contains("too short")),
146            _ => unreachable!(),
147        }
148    }
149
150    #[test]
151    fn rejects_bad_signature() {
152        let mut h = ok_header();
153        h[0] = 0x21; // !BDN magic - common confusion with PST
154        h[1] = 0x42;
155        let kind = identify_file(&h);
156        assert!(!kind.is_nsf());
157        match kind {
158            FileKind::NotNsf { reason } => {
159                assert!(reason.contains("21 42"));
160                assert!(reason.contains("1A 00"));
161            }
162            _ => unreachable!(),
163        }
164    }
165
166    #[test]
167    fn rejects_zero_header_size() {
168        let mut h = ok_header();
169        h[2] = 0x00;
170        h[3] = 0x00;
171        h[4] = 0x00;
172        h[5] = 0x00;
173        let kind = identify_file(&h);
174        assert!(!kind.is_nsf());
175        match kind {
176            FileKind::NotNsf { reason } => assert!(reason.contains("implausible")),
177            _ => unreachable!(),
178        }
179    }
180
181    #[test]
182    fn rejects_impossibly_large_header_size() {
183        let mut h = ok_header();
184        // 4 GB header? definitely not.
185        h[2] = 0xFF;
186        h[3] = 0xFF;
187        h[4] = 0xFF;
188        h[5] = 0xFF;
189        let kind = identify_file(&h);
190        assert!(!kind.is_nsf());
191    }
192
193    #[test]
194    fn strict_variant_returns_structured_error() {
195        let mut h = ok_header();
196        h[0] = 0xDE;
197        h[1] = 0xAD;
198        let err = identify_file_strict(&h).unwrap_err();
199        assert!(matches!(
200            err,
201            NsfError::BadFileSignature { observed: [0xDE, 0xAD] }
202        ));
203    }
204
205    #[test]
206    fn accepts_extra_bytes_after_header() {
207        let mut h = ok_header();
208        h.extend_from_slice(&[0xAA; 100_000]);
209        let kind = identify_file(&h);
210        assert!(kind.is_nsf());
211    }
212}