openqbw 0.1.1

QuickBooks .qbw file parser built on opensqlany page-store.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
//! Invoice line-item record parsing.
//!
//! The lineitem record is anchored by the 25-byte pattern
//! `00 00 00 00 10 <16 ASCII base62 chars> 00 00 80 3F` where the 16 chars
//! are the parent invoice's QB-ID and the trailing bytes are a `float32(1.0)`
//! quantity. Immediately after the anchor sit the typed amount bytes and a
//! date/counter pair.
//!
//! See `OpenQBW/re/NOTES.md §C.40` for the typed-amount byte semantics:
//!   - `0x01`, `0x02` -- `[type][u24 LE cents]`
//!   - `0x03`         -- deferred (raw bytes recorded but not converted)
//!   - `0x00`         -- non-amount marker (record skipped or carries
//!     date/counter only)

use std::iter::FusedIterator;

use opensqlany::{ApModel, PageStore, PageType, Result as SaResult};

use crate::bv_recovery::{deobfuscate_with_bv, recover_bv_qb_data};
use crate::page_attribution::PageAttribution;

pub use crate::date::DATE_EPOCH_DAYS_BEFORE_UNIX;

const PAGE_DATA_END: usize = 0xFE0;
const ANCHOR_LEN: usize = 25;
const QB_ID_LEN: usize = 16;
const F32_ONE: [u8; 4] = [0x00, 0x00, 0x80, 0x3F];
const PARENT_PREFIX: [u8; 5] = [0x00, 0x00, 0x00, 0x00, 0x10];

/// Errors from line-item parsing.
#[derive(Debug, thiserror::Error)]
pub enum LineItemError {
    /// Wrapped `opensqlany` error.
    #[error(transparent)]
    Sa(#[from] opensqlany::Error),
}

/// Classification of the amount-type byte in a line item.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AmountType {
    /// `0x00` — placeholder. The four bytes are not a cents value.
    None,
    /// `0x01` — `[0x01][u24 LE cents]` (provisional encoding).
    OneByteOne,
    /// `0x02` — `[0x02][u24 LE cents]`.
    Standard,
    /// `0x03` — deferred. Encoding not yet known; only `raw` is populated.
    Deferred,
    /// Any other byte — likely an anchor false-positive or unrecognised tag.
    Other(u8),
}

impl AmountType {
    /// Classify the leading amount-type byte.
    pub fn from_byte(b: u8) -> Self {
        match b {
            0x00 => Self::None,
            0x01 => Self::OneByteOne,
            0x02 => Self::Standard,
            0x03 => Self::Deferred,
            other => Self::Other(other),
        }
    }

    /// Decode the 4 amount bytes into UNSIGNED integer cents (the raw u24
    /// LE in `raw[1..4]`). Preserves the original Phase 5 transaction-volume
    /// baseline. Use [`AmountType::decode_cents_signed`] for the signed
    /// (high-bit-of-byte-1) interpretation discovered in C.48.
    pub fn decode_cents(self, raw: &[u8; 4]) -> Option<u32> {
        match self {
            Self::Standard | Self::OneByteOne => {
                let cents = (raw[1] as u32) | ((raw[2] as u32) << 8) | ((raw[3] as u32) << 16);
                Some(cents)
            }
            _ => None,
        }
    }

    /// Decode the 4 amount bytes into SIGNED integer cents under the
    /// high-bit-of-byte-1-is-sign convention (C.48 Track A). The magnitude
    /// is `(raw[1] & 0x7F) | (raw[2] << 8) | (raw[3] << 16)` and the sign
    /// is `-1` if `raw[1] & 0x80 != 0`. Returns `None` for types other than
    /// `0x01` and `0x02` (type `0x03` is NOT a signed amount; its bytes
    /// store enumerated reference codes - see C.48).
    pub fn decode_cents_signed(self, raw: &[u8; 4]) -> Option<i32> {
        match self {
            Self::Standard | Self::OneByteOne => {
                let mag =
                    ((raw[1] & 0x7F) as i32) | ((raw[2] as i32) << 8) | ((raw[3] as i32) << 16);
                let sign = if raw[1] & 0x80 != 0 { -1 } else { 1 };
                Some(sign * mag)
            }
            _ => None,
        }
    }
}

/// Parsed invoice line-item record.
#[derive(Debug, Clone)]
pub struct LineItem {
    /// Parent invoice QB-ID (16 base62 characters).
    pub invoice_id: String,
    /// Source page number in the QBW file.
    pub page_number: u64,
    /// Byte offset within the decoded page body where the anchor starts.
    pub page_offset: usize,
    /// Item QB-ID — the line item's own identifier, when discovered.
    pub item_qb_id: Option<String>,
    /// Amount-type classification.
    pub amount_type: AmountType,
    /// Decoded UNSIGNED cents value when [`AmountType::decode_cents`]
    /// applies. Preserves Phase 5 transaction-volume semantics.
    pub amount_cents: Option<u32>,
    /// Decoded SIGNED cents value (high-bit-of-byte-1-is-sign) when
    /// [`AmountType::decode_cents_signed`] applies. C.48 Track A.
    pub amount_cents_signed: Option<i32>,
    /// Raw 4 bytes immediately following the float32(1.0) quantity.
    pub amount_raw: [u8; 4],
    /// SA17 transaction date stored as days since 1981-01-01, when discovered.
    pub txn_date_raw: Option<u32>,
    /// SA17 transaction counter, when discovered.
    pub counter: Option<u32>,
    /// Owning `SYSTABLE` name (e.g. `abmc_invoice_inventory_lineitem`) when
    /// the line item's page was attributable. Populated by
    /// [`iter_lineitems_with_attribution`]; `None` otherwise.
    pub source_table: Option<String>,
}

impl LineItem {
    /// Convert the SA17 transaction date to days since the Unix epoch (1970-01-01).
    /// Returns `None` if the raw date is missing or implausible (over ~30k days
    /// past the SA epoch).
    pub fn txn_date_days_since_unix(&self) -> Option<i64> {
        self.txn_date_raw
            .filter(|&d| d > 0 && d < 30_000)
            .map(|d| d as i64 - DATE_EPOCH_DAYS_BEFORE_UNIX)
    }
}

/// Yields every line item found in `store` by scanning each `E`-type page.
///
/// Pages whose `bv` cannot be recovered are skipped silently. Each yielded
/// item has `source_table = None`.
pub fn iter_lineitems<'a>(
    store: &'a PageStore,
    model: &'a ApModel,
) -> impl Iterator<Item = LineItem> + 'a {
    LineItemIter::new(store, model, None)
}

/// Like [`iter_lineitems`], but tags each emitted item with the
/// `SysTableEntry::name` of the table that the line item's source page
/// is attributed to (via [`PageAttribution`]).
pub fn iter_lineitems_with_attribution<'a>(
    store: &'a PageStore,
    model: &'a ApModel,
    attribution: &'a PageAttribution,
) -> impl Iterator<Item = LineItem> + 'a {
    LineItemIter::new(store, model, Some(attribution))
}

struct LineItemIter<'a> {
    store: &'a PageStore,
    model: &'a ApModel,
    attribution: Option<&'a PageAttribution>,
    pn: u64,
    n_pages: u64,
    buffer: Vec<LineItem>,
}

impl<'a> LineItemIter<'a> {
    fn new(
        store: &'a PageStore,
        model: &'a ApModel,
        attribution: Option<&'a PageAttribution>,
    ) -> Self {
        Self {
            store,
            model,
            attribution,
            pn: 1, // skip superblock (page 0)
            n_pages: store.page_count(),
            buffer: Vec::new(),
        }
    }

    fn fill_buffer(&mut self) -> SaResult<bool> {
        while self.buffer.is_empty() && self.pn < self.n_pages {
            let pn = self.pn;
            self.pn += 1;

            let page = self.store.page(pn)?;
            if page.trailer().page_type() != PageType::Extent {
                continue;
            }

            let raw = page.bytes();
            // Prefer QB-specific anchor-based bv recovery (C.36). Falls back
            // to the generic AP model when the anchor is not present.
            let plain = match recover_bv_qb_data(pn, raw) {
                Some(bv) => deobfuscate_with_bv(raw, pn, bv),
                None => self.model.deobfuscate_with_store(raw, pn, self.store),
            };
            let before = self.buffer.len();
            scan_page(&plain[..PAGE_DATA_END], pn, &mut self.buffer);
            if let Some(attr) = self.attribution {
                if let Some(entry) = attr.attribute(pn) {
                    let name = entry.name.clone();
                    for li in &mut self.buffer[before..] {
                        li.source_table = Some(name.clone());
                    }
                }
            }
        }
        Ok(!self.buffer.is_empty())
    }
}

impl Iterator for LineItemIter<'_> {
    type Item = LineItem;

    fn next(&mut self) -> Option<Self::Item> {
        // Errors from individual pages are swallowed: a corrupted page should
        // not abort an entire export. A future API may expose per-page errors.
        loop {
            if let Some(item) = self.buffer.pop() {
                return Some(item);
            }
            match self.fill_buffer() {
                Ok(true) => continue,
                _ => return None,
            }
        }
    }
}

impl FusedIterator for LineItemIter<'_> {}

/// Scan a decoded page body for line-item anchors and append parsed records
/// to `out`. Items are appended in the order encountered.
fn scan_page(body: &[u8], pn: u64, out: &mut Vec<LineItem>) {
    if body.len() < ANCHOR_LEN {
        return;
    }
    let limit = body.len() - ANCHOR_LEN;
    let mut start = Vec::new();
    let mut pos = 0usize;
    while pos <= limit {
        // Cheap pre-filter: look for the 5-byte parent prefix.
        if body[pos..pos + 5] != PARENT_PREFIX {
            pos += 1;
            continue;
        }
        // Verify QB-ID chars are base62.
        let id_range = pos + 5..pos + 5 + QB_ID_LEN;
        if !is_base62(&body[id_range.clone()]) {
            pos += 1;
            continue;
        }
        // Verify trailing float32(1.0).
        if body[pos + 5 + QB_ID_LEN..pos + ANCHOR_LEN] != F32_ONE {
            pos += 1;
            continue;
        }
        start.push(pos);
        pos += ANCHOR_LEN;
    }

    // Reverse so we can pop in original order (iterator buffer is LIFO).
    for anchor in start.into_iter().rev() {
        out.push(parse_anchor(body, pn, anchor));
    }
}

fn parse_anchor(body: &[u8], pn: u64, anchor_start: usize) -> LineItem {
    let id_start = anchor_start + 5;
    let invoice_id = std::str::from_utf8(&body[id_start..id_start + QB_ID_LEN])
        .expect("anchor guarded by is_base62")
        .to_owned();

    let payload_start = anchor_start + ANCHOR_LEN;
    let mut amount_raw = [0u8; 4];
    if payload_start + 4 <= body.len() {
        amount_raw.copy_from_slice(&body[payload_start..payload_start + 4]);
    }
    let amount_type = AmountType::from_byte(amount_raw[0]);
    let amount_cents = amount_type.decode_cents(&amount_raw);
    let amount_cents_signed = amount_type.decode_cents_signed(&amount_raw);

    // Forward search for date+counter pair: SA date as u32 LE in 13000..20000,
    // followed by 4-byte counter > 0.
    let mut txn_date_raw = None;
    let mut counter = None;
    let end = (payload_start + 64).min(body.len().saturating_sub(8));
    for off in (payload_start + 4)..end {
        let d = u32::from_le_bytes([body[off], body[off + 1], body[off + 2], body[off + 3]]);
        let c = u32::from_le_bytes([body[off + 4], body[off + 5], body[off + 6], body[off + 7]]);
        if (13_000..20_000).contains(&d) && c > 0 && c < 1_000_000 {
            txn_date_raw = Some(d);
            counter = Some(c);
            break;
        }
    }

    // Backward search for item QB-ID prefixed by `04 00 10`.
    let mut item_qb_id = None;
    let back_start = anchor_start.saturating_sub(96);
    let back_slice = &body[back_start..anchor_start];
    if let Some(rel) = find_subslice(back_slice, &[0x04, 0x00, 0x10]) {
        let id_off = rel + 3;
        if id_off + QB_ID_LEN <= back_slice.len() {
            let id_bytes = &back_slice[id_off..id_off + QB_ID_LEN];
            if is_base62(id_bytes) {
                item_qb_id = Some(std::str::from_utf8(id_bytes).unwrap().to_owned());
            }
        }
    }

    LineItem {
        invoice_id,
        page_number: pn,
        page_offset: anchor_start,
        item_qb_id,
        amount_type,
        amount_cents,
        amount_cents_signed,
        amount_raw,
        txn_date_raw,
        counter,
        source_table: None,
    }
}

fn is_base62(s: &[u8]) -> bool {
    s.iter()
        .all(|&b| matches!(b, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z'))
}

fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
    if needle.is_empty() || haystack.len() < needle.len() {
        return None;
    }
    for i in 0..=haystack.len() - needle.len() {
        if &haystack[i..i + needle.len()] == needle {
            return Some(i);
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn amount_type_classification() {
        assert_eq!(AmountType::from_byte(0x00), AmountType::None);
        assert_eq!(AmountType::from_byte(0x01), AmountType::OneByteOne);
        assert_eq!(AmountType::from_byte(0x02), AmountType::Standard);
        assert_eq!(AmountType::from_byte(0x03), AmountType::Deferred);
        assert_eq!(AmountType::from_byte(0x42), AmountType::Other(0x42));
    }

    #[test]
    fn cents_decoding_known_record() {
        // Violette line item 1: 02 40 0f 04 → cents 0x040f40 = 266,048 → $2,660.48
        let raw = [0x02, 0x40, 0x0F, 0x04];
        let t = AmountType::from_byte(raw[0]);
        assert_eq!(t.decode_cents(&raw), Some(266_048));
    }

    #[test]
    fn type_03_no_cents() {
        let raw = [0x03, 0xBF, 0x5F, 0x63];
        let t = AmountType::from_byte(raw[0]);
        assert_eq!(t.decode_cents(&raw), None);
        assert_eq!(t.decode_cents_signed(&raw), None);
    }

    #[test]
    fn signed_decode_positive() {
        // C.48 example: 0x02 with high bit clear -> positive.
        // raw = 02 40 2d 01 -> magnitude (0x40 & 0x7F)|(0x2D<<8)|(0x01<<16) = 0x012D40 = 77120
        let raw = [0x02, 0x40, 0x2D, 0x01];
        let t = AmountType::from_byte(raw[0]);
        assert_eq!(t.decode_cents_signed(&raw), Some(77_120));
    }

    #[test]
    fn signed_decode_negative_pairs_with_positive() {
        // C.48 example: paired 2-line journal lines must sum to zero.
        // line1: 02 40 2d 01 -> +77120
        // line2: 02 c0 2d 01 -> -77120
        let pos = [0x02, 0x40, 0x2D, 0x01];
        let neg = [0x02, 0xC0, 0x2D, 0x01];
        let t = AmountType::from_byte(pos[0]);
        let a = t.decode_cents_signed(&pos).unwrap();
        let b = t.decode_cents_signed(&neg).unwrap();
        assert_eq!(a + b, 0);
        assert_eq!(a, 77_120);
        assert_eq!(b, -77_120);
    }

    #[test]
    fn signed_decode_large_magnitude() {
        // Verify magnitude reaches full 23-bit range.
        // 02 7F FF FF -> +(0x7F | 0xFF00 | 0xFF0000) = 0xFFFF7F = 16_777_087
        let raw = [0x02, 0x7F, 0xFF, 0xFF];
        let t = AmountType::from_byte(raw[0]);
        assert_eq!(t.decode_cents_signed(&raw), Some(16_777_087));
    }

    #[test]
    fn base62_check() {
        assert!(is_base62(b"0000000000001QBm"));
        assert!(!is_base62(b"0000000000001QB!"));
        assert!(!is_base62(b"0000000000001 QB"));
    }

    #[test]
    fn scan_page_finds_synthetic_anchor() {
        // Craft a body containing one valid anchor at offset 32.
        let mut body = vec![0xAAu8; PAGE_DATA_END];
        let anchor_offset = 32;
        // 5-byte parent prefix
        body[anchor_offset..anchor_offset + 5].copy_from_slice(&PARENT_PREFIX);
        // QB-ID
        body[anchor_offset + 5..anchor_offset + 5 + QB_ID_LEN].copy_from_slice(b"0000000000001QBm");
        // float32(1.0)
        body[anchor_offset + 5 + QB_ID_LEN..anchor_offset + ANCHOR_LEN].copy_from_slice(&F32_ONE);
        // amount bytes: type 0x02, cents 0x040F40 = 265536
        body[anchor_offset + ANCHOR_LEN..anchor_offset + ANCHOR_LEN + 4]
            .copy_from_slice(&[0x02, 0x40, 0x0F, 0x04]);
        // date u32 LE 15511, counter u32 LE 12671 at offset payload+8
        body[anchor_offset + ANCHOR_LEN + 8..anchor_offset + ANCHOR_LEN + 12]
            .copy_from_slice(&15511u32.to_le_bytes());
        body[anchor_offset + ANCHOR_LEN + 12..anchor_offset + ANCHOR_LEN + 16]
            .copy_from_slice(&12671u32.to_le_bytes());

        let mut out = Vec::new();
        scan_page(&body, 3679, &mut out);
        assert_eq!(out.len(), 1);
        let li = &out[0];
        assert_eq!(li.invoice_id, "0000000000001QBm");
        assert_eq!(li.amount_type, AmountType::Standard);
        assert_eq!(li.amount_cents, Some(266_048));
        assert_eq!(li.txn_date_raw, Some(15511));
        assert_eq!(li.counter, Some(12671));
    }
}