use crate::error::{Result, YxdbError};
use crate::field::{FieldMeta, FieldType};
#[derive(Debug)]
pub struct YxdbHeader {
pub num_records: u64,
pub meta_info_size: u32,
pub compression_version: i32,
pub record_block_index_pos: i64,
pub file_id: u32,
pub spatial_index_pos: i64,
}
pub const HEADER_SIZE: usize = 512;
pub const MAGIC: &[u8] = b"Alteryx Database File";
const MAX_RECORDS: u64 = 10_000_000_000;
pub const MAX_FIELDS: usize = 100_000;
pub const ID_WRIGLEYDB: u32 = 0x00440205;
pub const ID_WRIGLEYDB_NO_SPATIAL_INDEX: u32 = 0x00440204;
impl YxdbHeader {
pub fn has_spatial_index(&self) -> bool {
self.file_id == ID_WRIGLEYDB && self.spatial_index_pos > 0
}
pub fn parse(buf: &[u8; HEADER_SIZE]) -> Result<Self> {
if &buf[0..MAGIC.len()] != MAGIC {
return Err(YxdbError::InvalidFile(
"file does not start with 'Alteryx Database File'".into(),
));
}
let meta_info_size = u32::from_le_bytes(buf[80..84].try_into().unwrap());
let record_block_index_pos = i64::from_le_bytes(buf[96..104].try_into().unwrap());
let num_records = u64::from_le_bytes(buf[104..112].try_into().unwrap());
let compression_version = i32::from_le_bytes(buf[112..116].try_into().unwrap());
if num_records > MAX_RECORDS {
return Err(YxdbError::InvalidFile(format!(
"header record count {num_records} exceeds limit of {MAX_RECORDS} (corrupt file?)",
)));
}
let file_id = u32::from_le_bytes(buf[64..68].try_into().unwrap());
let spatial_index_pos = i64::from_le_bytes(buf[88..96].try_into().unwrap());
Ok(YxdbHeader {
num_records,
meta_info_size,
compression_version,
record_block_index_pos,
file_id,
spatial_index_pos,
})
}
}
pub fn parse_meta_xml(xml: &str) -> Result<Vec<FieldMeta>> {
use quick_xml::events::Event;
use quick_xml::Reader;
let mut reader = Reader::from_str(xml);
let mut fields = Vec::new();
let mut offset: usize = 0;
let mut in_record_info = false;
loop {
match reader.read_event() {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"RecordInfo" => {
in_record_info = true;
}
Ok(Event::End(ref e)) if e.name().as_ref() == b"RecordInfo" => {
in_record_info = false;
}
Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e))
if e.name().as_ref() == b"Field" && in_record_info =>
{
let mut name = String::new();
let mut type_str = String::new();
let mut size: usize = 0;
let mut scale: usize = 0;
for attr in e.attributes().flatten() {
match attr.key.as_ref() {
b"name" => {
name =
attr.unescape_value()
.map(|v| v.to_string())
.unwrap_or_else(|_| {
String::from_utf8_lossy(&attr.value).to_string()
});
}
b"type" => {
type_str = String::from_utf8_lossy(&attr.value).to_string();
}
b"size" => {
size = String::from_utf8_lossy(&attr.value).parse().unwrap_or(0);
}
b"scale" => {
scale = String::from_utf8_lossy(&attr.value).parse().unwrap_or(0);
}
_ => {}
}
}
let field_type = FieldType::from_xml_str(&type_str)
.ok_or_else(|| YxdbError::UnsupportedFieldType(type_str.clone()))?;
let current_offset = offset;
offset += field_type.fixed_bytes(size);
fields.push(FieldMeta {
name,
field_type,
size,
scale,
offset: current_offset,
});
if fields.len() > MAX_FIELDS {
return Err(YxdbError::InvalidFile(format!(
"field count exceeds limit of {MAX_FIELDS}"
)));
}
}
Ok(Event::Eof) => break,
Err(e) => {
return Err(YxdbError::XmlError(format!(
"error parsing XML metadata: {e}"
)));
}
_ => {}
}
}
if fields.is_empty() {
return Err(YxdbError::InvalidFile(
"no fields found in XML metadata".into(),
));
}
Ok(fields)
}
pub fn decode_utf16_le(bytes: &[u8]) -> String {
let code_units: Vec<u16> = bytes
.chunks_exact(2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
String::from_utf16_lossy(&code_units)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn header_magic_check() {
let mut buf = [0u8; HEADER_SIZE];
let res = YxdbHeader::parse(&buf);
assert!(res.is_err());
buf[..MAGIC.len()].copy_from_slice(MAGIC);
let res = YxdbHeader::parse(&buf);
assert!(res.is_ok());
}
#[test]
fn header_round_trip_fields() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
buf[80..84].copy_from_slice(&1000u32.to_le_bytes());
buf[96..104].copy_from_slice(&999999i64.to_le_bytes());
buf[104..112].copy_from_slice(&50000u64.to_le_bytes());
buf[112..116].copy_from_slice(&1i32.to_le_bytes());
let hdr = YxdbHeader::parse(&buf).unwrap();
assert_eq!(hdr.meta_info_size, 1000);
assert_eq!(hdr.record_block_index_pos, 999999);
assert_eq!(hdr.num_records, 50000);
assert_eq!(hdr.compression_version, 1);
}
#[test]
fn header_zero_records() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
let hdr = YxdbHeader::parse(&buf).unwrap();
assert_eq!(hdr.num_records, 0);
assert_eq!(hdr.compression_version, 0);
}
#[test]
fn header_truncated_magic() {
let mut buf = [0u8; HEADER_SIZE];
buf[..10].copy_from_slice(&MAGIC[..10]); assert!(YxdbHeader::parse(&buf).is_err());
}
#[test]
fn header_negative_record_count_rejected() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
buf[104..112].copy_from_slice(&(-1i64).to_le_bytes());
let err = YxdbHeader::parse(&buf).unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("exceeds limit"), "unexpected error: {msg}");
}
#[test]
fn parse_simple_xml() {
let xml = r#"<RecordInfo>
<Field name="ID" type="Int32" />
<Field name="Name" type="V_WString" size="256" />
<Field name="Value" type="Double" />
</RecordInfo>"#;
let fields = parse_meta_xml(xml).unwrap();
assert_eq!(fields.len(), 3);
assert_eq!(fields[0].name, "ID");
assert_eq!(fields[0].field_type, FieldType::Int32);
assert_eq!(fields[0].offset, 0);
assert_eq!(fields[1].offset, 5);
assert_eq!(fields[1].field_type, FieldType::VWString);
assert_eq!(fields[2].offset, 9);
assert_eq!(fields[2].field_type, FieldType::Double);
}
#[test]
fn parse_xml_all_types() {
let xml = r#"<RecordInfo>
<Field name="a" type="Bool" />
<Field name="b" type="Byte" />
<Field name="c" type="Int16" />
<Field name="d" type="Int32" />
<Field name="e" type="Int64" />
<Field name="f" type="Float" />
<Field name="g" type="Double" />
<Field name="h" type="FixedDecimal" size="19" scale="4" />
<Field name="i" type="String" size="10" />
<Field name="j" type="WString" size="10" />
<Field name="k" type="V_String" size="256" />
<Field name="l" type="V_WString" size="256" />
<Field name="m" type="Date" />
<Field name="n" type="Time" />
<Field name="o" type="DateTime" />
<Field name="p" type="Blob" size="1000" />
<Field name="q" type="SpatialObj" size="1000" />
</RecordInfo>"#;
let fields = parse_meta_xml(xml).unwrap();
assert_eq!(fields.len(), 17);
assert_eq!(fields[0].field_type, FieldType::Bool);
assert_eq!(fields[7].field_type, FieldType::FixedDecimal);
assert_eq!(fields[7].scale, 4);
assert_eq!(fields[16].field_type, FieldType::SpatialObj);
}
#[test]
fn parse_xml_empty_record_info() {
let xml = r#"<RecordInfo></RecordInfo>"#;
let result = parse_meta_xml(xml);
assert!(result.is_err()); }
#[test]
fn parse_xml_unsupported_type() {
let xml = r#"<RecordInfo>
<Field name="x" type="UnknownType" />
</RecordInfo>"#;
let result = parse_meta_xml(xml);
assert!(result.is_err());
}
#[test]
fn parse_xml_nested_in_outer_element() {
let xml = r#"<MetaInfo>
<RecordInfo>
<Field name="x" type="Int32" />
</RecordInfo>
</MetaInfo>"#;
let fields = parse_meta_xml(xml).unwrap();
assert_eq!(fields.len(), 1);
assert_eq!(fields[0].name, "x");
}
#[test]
fn parse_xml_field_outside_record_info_ignored() {
let xml = r#"<Root>
<Field name="ignored" type="Int32" />
<RecordInfo>
<Field name="real" type="Int32" />
</RecordInfo>
<Field name="also_ignored" type="Int32" />
</Root>"#;
let fields = parse_meta_xml(xml).unwrap();
assert_eq!(fields.len(), 1);
assert_eq!(fields[0].name, "real");
}
#[test]
fn decode_utf16le_ascii() {
let bytes = [0x48, 0x00, 0x69, 0x00]; assert_eq!(decode_utf16_le(&bytes), "Hi");
}
#[test]
fn decode_utf16le_non_ascii() {
let bytes = [0xFC, 0x00];
assert_eq!(decode_utf16_le(&bytes), "ü");
}
#[test]
fn decode_utf16le_empty() {
assert_eq!(decode_utf16_le(&[]), "");
}
#[test]
fn decode_utf16le_cjk() {
let bytes = [0xE5, 0x65];
assert_eq!(decode_utf16_le(&bytes), "日");
}
#[test]
fn header_file_id_and_spatial_index() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
buf[64..68].copy_from_slice(&ID_WRIGLEYDB_NO_SPATIAL_INDEX.to_le_bytes());
let hdr = YxdbHeader::parse(&buf).unwrap();
assert_eq!(hdr.file_id, ID_WRIGLEYDB_NO_SPATIAL_INDEX);
assert!(!hdr.has_spatial_index());
assert_eq!(hdr.spatial_index_pos, 0);
}
#[test]
fn header_with_spatial_index() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
buf[64..68].copy_from_slice(&ID_WRIGLEYDB.to_le_bytes());
buf[88..96].copy_from_slice(&12345i64.to_le_bytes());
let hdr = YxdbHeader::parse(&buf).unwrap();
assert_eq!(hdr.file_id, ID_WRIGLEYDB);
assert!(hdr.has_spatial_index());
assert_eq!(hdr.spatial_index_pos, 12345);
}
#[test]
fn header_spatial_index_zero_pos_means_no_index() {
let mut buf = [0u8; HEADER_SIZE];
buf[..MAGIC.len()].copy_from_slice(MAGIC);
buf[64..68].copy_from_slice(&ID_WRIGLEYDB.to_le_bytes());
buf[88..96].copy_from_slice(&0i64.to_le_bytes());
let hdr = YxdbHeader::parse(&buf).unwrap();
assert!(!hdr.has_spatial_index());
}
}