use std::collections::BTreeMap;
use std::iter::FusedIterator;
use opensqlany::{ApModel, Page, PageStore, PageType, Result as SaResult, SlottedPage};
use crate::bv_recovery::{deobfuscate_with_bv, oracle_bv_e_page, recover_bv_qb_data};
pub const SYSCOLUMN_TAG: [u8; 8] = [0x01, 0x52, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00];
const NAME_LEN_MIN: usize = 1;
const NAME_LEN_MAX: usize = 40;
const DEFAULT_PEELS: [usize; 7] = [0, 1, 2, 3, 4, 8, 16];
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SysColumn {
pub name: String,
pub owner_object_id: u32,
pub column_id: u32,
pub nulls_flag: u8,
pub domain_char: u8,
pub width: u8,
pub page_number: u64,
pub tag_offset: usize,
}
fn find_name_before(body: &[u8], tag_pos: usize) -> Option<String> {
for peel in DEFAULT_PEELS {
if tag_pos < peel + NAME_LEN_MIN + 1 {
continue;
}
let inner = tag_pos - peel;
for name_len in NAME_LEN_MIN..=NAME_LEN_MAX {
if inner < name_len + 1 {
continue;
}
let len_off = inner - name_len - 1;
if body[len_off] as usize != name_len {
continue;
}
let s = &body[len_off + 1..len_off + 1 + name_len];
if !s.iter().all(|&b| b.is_ascii_alphanumeric() || b == b'_') {
continue;
}
if !(s[0].is_ascii_alphabetic() || s[0] == b'_') {
continue;
}
return Some(s.iter().map(|&b| b as char).collect());
}
}
None
}
fn parse_rows_in_body(body: &[u8], pn: u64, out: &mut Vec<SysColumn>) {
let n = body.len();
if n < SYSCOLUMN_TAG.len() + 17 {
return;
}
let mut i = 0usize;
while i + SYSCOLUMN_TAG.len() + 17 <= n {
if body[i..i + SYSCOLUMN_TAG.len()] != SYSCOLUMN_TAG {
i += 1;
continue;
}
let Some(name) = find_name_before(body, i) else {
i += SYSCOLUMN_TAG.len();
continue;
};
let p = i + SYSCOLUMN_TAG.len() + 4;
if p + 9 > n {
break;
}
let owner = u32::from_le_bytes([body[p], body[p + 1], body[p + 2], body[p + 3]]);
let col_id = u32::from_le_bytes([body[p + 4], body[p + 5], body[p + 6], body[p + 7]]);
let nulls_flag = body[p + 8];
if body[p + 10] != 0x01 {
i += SYSCOLUMN_TAG.len();
continue;
}
let domain_char = body[p + 11];
let width = body[p + 12];
if !domain_char.is_ascii_alphabetic() {
i += SYSCOLUMN_TAG.len();
continue;
}
out.push(SysColumn {
name,
owner_object_id: owner,
column_id: col_id,
nulls_flag,
domain_char,
width,
page_number: pn,
tag_offset: i,
});
i += SYSCOLUMN_TAG.len();
}
}
pub fn scan_page(plain: &[u8], pn: u64, out: &mut Vec<SysColumn>) {
let page = Page::from_bytes(pn, plain);
let sp = SlottedPage::parse(page);
if sp.directory.is_none() {
return;
}
for (_off, body) in sp.row_bytes() {
parse_rows_in_body(body, pn, out);
}
}
pub fn iter_syscolumns<'a>(
store: &'a PageStore,
model: &'a ApModel,
) -> impl Iterator<Item = SysColumn> + 'a {
SysColumnIter::new(store, model)
}
pub fn collect_unique(store: &PageStore, model: &ApModel) -> Vec<SysColumn> {
let mut uniq: BTreeMap<(u32, u32, String), SysColumn> = BTreeMap::new();
for c in iter_syscolumns(store, model) {
uniq.entry((c.owner_object_id, c.column_id, c.name.clone()))
.or_insert(c);
}
uniq.into_values().collect()
}
pub fn schema_for(store: &PageStore, model: &ApModel, table_name: &str) -> Vec<SysColumn> {
let columns: Vec<SysColumn> = iter_syscolumns(store, model).collect();
let tables = crate::iter_systable_entries(store, model).collect::<Vec<_>>();
let bridge = crate::sysobject::bridge_owners_to_tables(store, model, &columns, &tables);
let Some((&owner, _)) = bridge.iter().find(|(_, n)| n.as_str() == table_name) else {
return Vec::new();
};
let mut cols: Vec<SysColumn> = columns
.into_iter()
.filter(|c| c.owner_object_id == owner)
.collect();
cols.sort_by_key(|c| c.column_id);
cols.dedup_by(|a, b| a.column_id == b.column_id && a.name == b.name);
cols
}
struct SysColumnIter<'a> {
store: &'a PageStore,
model: &'a ApModel,
pn: u64,
n_pages: u64,
buffer: Vec<SysColumn>,
}
impl<'a> SysColumnIter<'a> {
fn new(store: &'a PageStore, model: &'a ApModel) -> Self {
Self {
store,
model,
pn: 1,
n_pages: store.page_count(),
buffer: Vec::new(),
}
}
fn fill_buffer(&mut self) -> SaResult<bool> {
while self.buffer.is_empty() && self.pn < self.n_pages {
let pn = self.pn;
self.pn += 1;
let page = self.store.page(pn)?;
if page.trailer().page_type() != PageType::Extent {
continue;
}
let raw = page.bytes();
let plain = if let Some(bv) = recover_bv_qb_data(pn, raw) {
deobfuscate_with_bv(raw, pn, bv)
} else {
let bv = oracle_bv_e_page(pn, raw);
let candidate = deobfuscate_with_bv(raw, pn, bv);
if candidate[0] == 0 {
candidate
} else {
self.model.deobfuscate_with_store(raw, pn, self.store)
}
};
let mut found = Vec::new();
scan_page(&plain, pn, &mut found);
for c in found.into_iter().rev() {
self.buffer.push(c);
}
}
Ok(!self.buffer.is_empty())
}
}
impl Iterator for SysColumnIter<'_> {
type Item = SysColumn;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(c) = self.buffer.pop() {
return Some(c);
}
match self.fill_buffer() {
Ok(true) => continue,
_ => return None,
}
}
}
}
impl FusedIterator for SysColumnIter<'_> {}
#[cfg(test)]
mod tests {
use super::*;
#[allow(clippy::too_many_arguments)]
fn synth_row(
name: &str,
default: Option<&str>,
row_id: u32,
owner: u32,
col_id: u32,
nulls: u8,
domain: u8,
width: u8,
) -> Vec<u8> {
let mut v = Vec::new();
v.push(name.len() as u8);
v.extend_from_slice(name.as_bytes());
if let Some(d) = default {
v.push(d.len() as u8);
v.extend_from_slice(d.as_bytes());
}
v.extend_from_slice(&SYSCOLUMN_TAG);
v.extend_from_slice(&row_id.to_le_bytes());
v.extend_from_slice(&owner.to_le_bytes());
v.extend_from_slice(&col_id.to_le_bytes());
v.push(nulls);
v.push(0x00);
v.push(0x01);
v.push(domain);
v.push(width);
v
}
#[test]
fn parses_single_row_without_default() {
let body = synth_row("account_id", None, 0x80000001, 3680, 1, 2, b'N', 4);
let mut out = Vec::new();
parse_rows_in_body(&body, 42, &mut out);
assert_eq!(out.len(), 1);
let c = &out[0];
assert_eq!(c.name, "account_id");
assert_eq!(c.owner_object_id, 3680);
assert_eq!(c.column_id, 1);
assert_eq!(c.nulls_flag, 2);
assert_eq!(c.domain_char, b'N');
assert_eq!(c.width, 4);
assert_eq!(c.page_number, 42);
}
#[test]
fn parses_multiple_rows_concatenated() {
let mut body = synth_row("amount_amt", None, 1, 100, 7, 2, b'Y', 8);
body.extend(synth_row("memo", None, 2, 100, 8, 1, b'Y', 64));
let mut out = Vec::new();
parse_rows_in_body(&body, 0, &mut out);
assert_eq!(out.len(), 2);
assert_eq!(out[0].name, "amount_amt");
assert_eq!(out[1].name, "memo");
assert_eq!(out[1].width, 64);
}
#[test]
fn handles_underscore_and_digits_in_name() {
let body = synth_row("col_42_xy", None, 0, 5, 1, 0, b'N', 1);
let mut out = Vec::new();
parse_rows_in_body(&body, 0, &mut out);
assert_eq!(out.len(), 1);
assert_eq!(out[0].name, "col_42_xy");
}
#[test]
fn rejects_bad_marker() {
let mut body = synth_row("good", None, 0, 1, 1, 0, b'N', 4);
let mark = body.len() - 3;
body[mark] = 0x00;
let mut out = Vec::new();
parse_rows_in_body(&body, 0, &mut out);
assert!(out.is_empty());
}
#[test]
fn rejects_non_alpha_domain() {
let body = synth_row("col", None, 0, 1, 1, 0, 0xFF, 4);
let mut out = Vec::new();
parse_rows_in_body(&body, 0, &mut out);
assert!(out.is_empty());
}
#[test]
fn name_back_walk_skips_into_garbage_prefix() {
let mut body = vec![0xAA, 0xBB, 0xCC, 0xDD];
body.extend(synth_row("real_name", None, 0, 1, 1, 0, b'N', 4));
let mut out = Vec::new();
parse_rows_in_body(&body, 0, &mut out);
assert_eq!(out.len(), 1);
assert_eq!(out[0].name, "real_name");
}
}