use std::collections::HashMap;
use std::fs;
use std::io::Write;
use std::path::Path;
use crate::error::QvdError;
use crate::header::NumberFormat;
use crate::value::{Cell, Value};
#[derive(Debug, Clone)]
pub struct Column {
pub name: String,
pub cells: Vec<Cell>,
pub number_format: NumberFormat,
pub tags: Vec<String>,
}
impl Column {
pub fn new(name: impl Into<String>, cells: Vec<Cell>) -> Self {
Self {
name: name.into(),
cells,
number_format: NumberFormat {
r#type: "UNKNOWN".to_string(),
n_dec: "0".to_string(),
use_thou: "0".to_string(),
..NumberFormat::default()
},
tags: Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct WriteTable {
pub name: String,
pub columns: Vec<Column>,
}
impl WriteTable {
pub fn new(name: impl Into<String>, columns: Vec<Column>) -> Result<Self, QvdError> {
if let Some(first) = columns.first() {
let n = first.cells.len();
for c in &columns {
if c.cells.len() != n {
return Err(QvdError::structure(format!(
"column {:?} has {} cells, expected {}",
c.name,
c.cells.len(),
n
)));
}
}
}
Ok(Self {
name: name.into(),
columns,
})
}
pub fn num_rows(&self) -> usize {
self.columns.first().map(|c| c.cells.len()).unwrap_or(0)
}
pub fn to_bytes(&self) -> Result<Vec<u8>, QvdError> {
encode(self)
}
pub fn write_to_path(&self, path: impl AsRef<Path>) -> Result<(), QvdError> {
let bytes = self.to_bytes()?;
let mut f = fs::File::create(path.as_ref())?;
f.write_all(&bytes)?;
Ok(())
}
}
struct ColumnPlan {
symbols: Vec<Value>,
stored: Vec<u64>,
symbol_bytes: Vec<u8>,
bit_offset: u32,
bit_width: u32,
bias: i32,
offset_in_body: u32,
length_in_body: u32,
}
fn plan_column(col: &Column) -> Result<ColumnPlan, QvdError> {
let mut index_of: HashMap<SymbolKey, u32> = HashMap::new();
let mut symbols: Vec<Value> = Vec::new();
let mut has_null = false;
let mut indices: Vec<Option<u32>> = Vec::with_capacity(col.cells.len());
for cell in &col.cells {
match cell {
None => {
has_null = true;
indices.push(None);
}
Some(v) => {
let key = SymbolKey::from(v);
let idx = if let Some(&i) = index_of.get(&key) {
i
} else {
let i = symbols.len() as u32;
index_of.insert(key, i);
symbols.push(v.clone());
i
};
indices.push(Some(idx));
}
}
}
let n_symbols = symbols.len() as u64;
let bias: i32 = if has_null { -2 } else { 0 };
let max_stored: u64 = if n_symbols == 0 && has_null {
1 } else if n_symbols == 0 {
0
} else {
(n_symbols - 1) + ((-bias) as u64)
};
let bit_width: u32 = if n_symbols <= 1 && !has_null {
0
} else {
let mut w = 0u32;
while ((1u64 << w) - 1) < max_stored {
w += 1;
if w > 64 {
return Err(QvdError::structure(
"column cardinality exceeds 64-bit bit width",
));
}
}
w
};
let stored: Vec<u64> = indices
.into_iter()
.map(|opt| match opt {
Some(i) => (i as i64 - bias as i64) as u64,
None => 0u64, })
.collect();
let symbol_bytes = encode_symbols(&symbols)?;
let length_in_body = symbol_bytes.len() as u32;
Ok(ColumnPlan {
symbols,
stored,
symbol_bytes,
bit_offset: 0,
bit_width,
bias,
offset_in_body: 0,
length_in_body,
})
}
#[derive(Hash, Eq, PartialEq)]
enum SymbolKey {
Int(i32),
Float(u64),
Str(String),
DualInt(i32, String),
DualFloat(u64, String),
}
impl From<&Value> for SymbolKey {
fn from(v: &Value) -> Self {
match v {
Value::Int(i) => SymbolKey::Int(*i),
Value::Float(f) => SymbolKey::Float(f.to_bits()),
Value::Str(s) => SymbolKey::Str(s.clone()),
Value::DualInt(d) => SymbolKey::DualInt(d.number, d.text.clone()),
Value::DualFloat(d) => SymbolKey::DualFloat(d.number.to_bits(), d.text.clone()),
}
}
}
fn encode_symbols(symbols: &[Value]) -> Result<Vec<u8>, QvdError> {
let mut out = Vec::with_capacity(symbols.len() * 8);
for s in symbols {
match s {
Value::Int(i) => {
out.push(0x01);
out.extend_from_slice(&i.to_le_bytes());
}
Value::Float(f) => {
out.push(0x02);
out.extend_from_slice(&f.to_le_bytes());
}
Value::Str(s) => {
out.push(0x04);
write_cstring(&mut out, s)?;
}
Value::DualInt(d) => {
out.push(0x05);
out.extend_from_slice(&d.number.to_le_bytes());
write_cstring(&mut out, &d.text)?;
}
Value::DualFloat(d) => {
out.push(0x06);
out.extend_from_slice(&d.number.to_le_bytes());
write_cstring(&mut out, &d.text)?;
}
}
}
Ok(out)
}
fn write_cstring(out: &mut Vec<u8>, s: &str) -> Result<(), QvdError> {
if s.as_bytes().contains(&0x00) {
return Err(QvdError::structure(
"string symbol contains NUL byte, which is reserved as the symbol terminator",
));
}
out.extend_from_slice(s.as_bytes());
out.push(0x00);
Ok(())
}
fn encode(table: &WriteTable) -> Result<Vec<u8>, QvdError> {
let mut plans: Vec<ColumnPlan> = table
.columns
.iter()
.map(plan_column)
.collect::<Result<_, _>>()?;
let mut bit_cursor: u32 = 0;
for p in plans.iter_mut() {
p.bit_offset = bit_cursor;
bit_cursor = bit_cursor
.checked_add(p.bit_width)
.ok_or_else(|| QvdError::structure("bit layout overflow"))?;
}
let record_bits = bit_cursor;
let record_byte_size = record_bits.div_ceil(8);
let mut body_cursor: u32 = 0;
for p in plans.iter_mut() {
p.offset_in_body = body_cursor;
body_cursor = body_cursor
.checked_add(p.length_in_body)
.ok_or_else(|| QvdError::structure("body layout overflow"))?;
}
let row_block_offset = body_cursor;
let n_rows = table.num_rows() as u32;
let row_block_length = record_byte_size
.checked_mul(n_rows)
.ok_or_else(|| QvdError::structure("row block size overflow"))?;
let xml = build_xml_header(
table,
&plans,
record_byte_size,
n_rows,
row_block_offset,
row_block_length,
);
let mut out = Vec::with_capacity(xml.len() + 1 + (body_cursor + row_block_length) as usize);
out.extend_from_slice(xml.as_bytes());
out.push(0x00);
for p in &plans {
out.extend_from_slice(&p.symbol_bytes);
}
let rbs = record_byte_size as usize;
if rbs > 0 && record_bits <= 128 {
for row in 0..(n_rows as usize) {
let mut rec: u128 = 0;
for p in &plans {
if p.bit_width == 0 {
continue;
}
let stored = p.stored[row] as u128;
let mask = if p.bit_width == 128 {
u128::MAX
} else {
(1u128 << p.bit_width) - 1
};
rec |= (stored & mask) << p.bit_offset;
}
for i in 0..rbs {
out.push(((rec >> (i * 8)) & 0xFF) as u8);
}
}
} else if rbs > 0 {
for row in 0..(n_rows as usize) {
let mut buf = vec![0u8; rbs];
for p in &plans {
if p.bit_width == 0 {
continue;
}
write_bits(&mut buf, p.bit_offset, p.bit_width, p.stored[row]);
}
out.extend_from_slice(&buf);
}
}
for p in &plans {
let _ = &p.symbols;
}
Ok(out)
}
fn write_bits(buf: &mut [u8], bit_offset: u32, bit_width: u32, value: u64) {
let mut remaining = bit_width;
let mut pos = bit_offset;
let mut src = value;
while remaining > 0 {
let byte_idx = (pos / 8) as usize;
let in_byte_off = pos % 8;
let can_take = (8 - in_byte_off).min(remaining);
let mask = ((1u64 << can_take) - 1) as u8;
let chunk = (src as u8) & mask;
buf[byte_idx] |= chunk << in_byte_off;
src >>= can_take;
pos += can_take;
remaining -= can_take;
}
}
fn build_xml_header(
table: &WriteTable,
plans: &[ColumnPlan],
record_byte_size: u32,
n_rows: u32,
row_block_offset: u32,
row_block_length: u32,
) -> String {
let mut s = String::new();
s.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n");
s.push_str("<QvdTableHeader>\r\n");
s.push_str(&format!(
" <TableName>{}</TableName>\r\n",
xml_escape(&table.name)
));
s.push_str(" <Fields>\r\n");
for (col, plan) in table.columns.iter().zip(plans) {
let nf = &col.number_format;
s.push_str(" <QvdFieldHeader>\r\n");
s.push_str(&format!(
" <FieldName>{}</FieldName>\r\n",
xml_escape(&col.name)
));
s.push_str(&format!(
" <BitOffset>{}</BitOffset>\r\n",
plan.bit_offset
));
s.push_str(&format!(
" <BitWidth>{}</BitWidth>\r\n",
plan.bit_width
));
s.push_str(&format!(" <Bias>{}</Bias>\r\n", plan.bias));
s.push_str(" <NumberFormat>\r\n");
s.push_str(&format!(
" <Type>{}</Type>\r\n",
xml_escape(if nf.r#type.is_empty() {
"UNKNOWN"
} else {
&nf.r#type
})
));
s.push_str(&format!(
" <nDec>{}</nDec>\r\n",
xml_escape(if nf.n_dec.is_empty() { "0" } else { &nf.n_dec })
));
s.push_str(&format!(
" <UseThou>{}</UseThou>\r\n",
xml_escape(if nf.use_thou.is_empty() {
"0"
} else {
&nf.use_thou
})
));
s.push_str(&format!(" <Fmt>{}</Fmt>\r\n", xml_escape(&nf.fmt)));
s.push_str(&format!(" <Dec>{}</Dec>\r\n", xml_escape(&nf.dec)));
s.push_str(&format!(
" <Thou>{}</Thou>\r\n",
xml_escape(&nf.thou)
));
s.push_str(" </NumberFormat>\r\n");
s.push_str(&format!(
" <NoOfSymbols>{}</NoOfSymbols>\r\n",
plan.symbols.len()
));
s.push_str(&format!(
" <Offset>{}</Offset>\r\n",
plan.offset_in_body
));
s.push_str(&format!(
" <Length>{}</Length>\r\n",
plan.length_in_body
));
if col.tags.is_empty() {
s.push_str(" <Tags/>\r\n");
} else {
s.push_str(" <Tags>\r\n");
for t in &col.tags {
s.push_str(&format!(" <String>{}</String>\r\n", xml_escape(t)));
}
s.push_str(" </Tags>\r\n");
}
s.push_str(" </QvdFieldHeader>\r\n");
}
s.push_str(" </Fields>\r\n");
s.push_str(" <Compression></Compression>\r\n");
s.push_str(&format!(
" <RecordByteSize>{}</RecordByteSize>\r\n",
record_byte_size
));
s.push_str(&format!(" <NoOfRecords>{}</NoOfRecords>\r\n", n_rows));
s.push_str(&format!(" <Offset>{}</Offset>\r\n", row_block_offset));
s.push_str(&format!(" <Length>{}</Length>\r\n", row_block_length));
s.push_str("</QvdTableHeader>\r\n");
s
}
fn xml_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\'' => out.push_str("'"),
_ => out.push(c),
}
}
out
}