use std::collections::BTreeMap;
use opensqlany::{ApModel, Page, PageStore, PageType, SlottedPage};
use crate::bv_recovery::{deobfuscate_with_bv, recover_bv_qb_data};
use crate::page_attribution::PageAttribution;
use crate::systable::{SysTableEntry, iter_systable_entries};
pub const SIG_LEN: usize = 12;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RowSignature(Vec<u8>);
impl RowSignature {
pub fn from_row(row: &[u8]) -> Self {
let take = row.len().min(SIG_LEN);
Self(row[..take].to_vec())
}
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
pub fn to_hex(&self) -> String {
self.0
.iter()
.map(|b| format!("{:02x}", b))
.collect::<Vec<_>>()
.join(" ")
}
}
#[derive(Debug, Clone)]
pub struct ContentAttribution {
unique: BTreeMap<RowSignature, SysTableEntry>,
ambiguous_count: usize,
skipped_count: usize,
}
#[derive(Debug, Default, Clone, Copy)]
pub struct AttributionAgreement {
pub agree: u64,
pub disagree: u64,
pub only_position: u64,
pub only_content: u64,
pub neither: u64,
}
impl AttributionAgreement {
pub fn total(&self) -> u64 {
self.agree + self.disagree + self.only_position + self.only_content + self.neither
}
}
impl ContentAttribution {
pub fn build(store: &PageStore, model: &ApModel) -> Self {
let entries: Vec<SysTableEntry> = iter_systable_entries(store, model).collect();
let mut sig_to_tables: BTreeMap<RowSignature, Vec<SysTableEntry>> = BTreeMap::new();
let mut skipped = 0usize;
for entry in entries {
let Some(sig) = entry
.data_root_page
.and_then(|root| extract_signature(store, model, root as u64))
else {
skipped += 1;
continue;
};
sig_to_tables.entry(sig).or_default().push(entry);
}
let mut unique = BTreeMap::new();
let mut ambiguous = 0usize;
for (sig, mut tables) in sig_to_tables {
if tables.len() == 1 {
unique.insert(sig, tables.pop().expect("len == 1"));
} else {
ambiguous += 1;
}
}
Self {
unique,
ambiguous_count: ambiguous,
skipped_count: skipped,
}
}
pub fn attribute(
&self,
store: &PageStore,
model: &ApModel,
page_number: u64,
) -> Option<&SysTableEntry> {
let sig = extract_signature(store, model, page_number)?;
self.unique.get(&sig)
}
pub fn len(&self) -> usize {
self.unique.len()
}
pub fn is_empty(&self) -> bool {
self.unique.is_empty()
}
pub fn ambiguous_count(&self) -> usize {
self.ambiguous_count
}
pub fn skipped_count(&self) -> usize {
self.skipped_count
}
pub fn compare<I>(
&self,
store: &PageStore,
model: &ApModel,
position: &PageAttribution,
pages: I,
) -> AttributionAgreement
where
I: IntoIterator<Item = u64>,
{
let mut out = AttributionAgreement::default();
for pn in pages {
let pos_name = position.attribute(pn).map(|e| e.name.as_str());
let con_name = self.attribute(store, model, pn).map(|e| e.name.as_str());
match (pos_name, con_name) {
(Some(a), Some(b)) if a == b => out.agree += 1,
(Some(_), Some(_)) => out.disagree += 1,
(Some(_), None) => out.only_position += 1,
(None, Some(_)) => out.only_content += 1,
(None, None) => out.neither += 1,
}
}
out
}
}
fn extract_signature(store: &PageStore, model: &ApModel, page_number: u64) -> Option<RowSignature> {
let page = store.page(page_number).ok()?;
if page.trailer().page_type() != PageType::Extent {
return None;
}
let raw = page.bytes();
let plain = if let Some(bv) = recover_bv_qb_data(page_number, raw) {
deobfuscate_with_bv(raw, page_number, bv)
} else {
model.deobfuscate_with_store(raw, page_number, store)
};
let p = Page::from_bytes(page_number, &plain);
let sp = SlottedPage::parse(p);
sp.directory.as_ref()?;
let rows = sp.row_bytes();
let (_slot, first_row) = rows.first()?;
Some(RowSignature::from_row(first_row))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn signature_truncates_long_row() {
let row = vec![1u8; 64];
let s = RowSignature::from_row(&row);
assert_eq!(s.as_bytes().len(), SIG_LEN);
assert!(s.as_bytes().iter().all(|&b| b == 1));
}
#[test]
fn signature_handles_short_row() {
let row = vec![0xAB, 0xCD, 0xEF];
let s = RowSignature::from_row(&row);
assert_eq!(s.as_bytes(), &[0xAB, 0xCD, 0xEF]);
}
#[test]
fn signature_hex_format() {
let s = RowSignature::from_row(&[0x00, 0x0E, 0xFF]);
assert_eq!(s.to_hex(), "00 0e ff");
}
#[test]
fn agreement_total_sums_components() {
let a = AttributionAgreement {
agree: 5,
disagree: 2,
only_position: 1,
only_content: 3,
neither: 4,
};
assert_eq!(a.total(), 15);
}
}