1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
/*
* SPDX-FileCopyrightText: 2023 Inria
* SPDX-FileCopyrightText: 2023 Sebastiano Vigna
*
* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
*/
/*!
Traits and implementations to write named field during serialization.
[`SerializeInner::_serialize_inner`] writes on a [`WriteWithNames`], rather
than on a [`WriteWithPos`], with the purpose of easily recording write
events happening during a serialization.
*/
use super::*;
/// Trait extending [`WriteWithPos`] with methods providing
/// alignment, serialization of named data, and writing of byte slices
/// of zero-copy types.
///
/// The purpose of this trait is that of interposing between [`SerializeInner`]
/// and the underlying [`WriteWithPos`] a layer in which serialization operations
/// can be easily intercepted and recorded. In particular, serialization methods
/// must use the methods of this trait if they want to record the schema of the
/// serialized data; this is true (maybe counterintuitively) even of ancillary
/// data such as tags and slice lengths: see [`helpers`] or the
/// [implementation of `Option`](impls::prim) for examples.
/// All methods have a default
/// implementation that must be replicated in other implementations.
///
/// There are two implementations of [`WriteWithNames`]: [`WriterWithPos`],
/// which uses the default implementation, and [`SchemaWriter`],
/// which additionally records a [`Schema`] of the serialized data.
pub trait WriteWithNames: WriteWithPos + Sized {
/// Add some zero padding so that `self.pos() % V:max_size_of() == 0.`
///
/// Other implementations must write the same number of zeros.
fn align<V: MaxSizeOf>(&mut self) -> Result<()> {
let padding = pad_align_to(self.pos(), V::max_size_of());
for _ in 0..padding {
self.write_all(&[0])?;
}
Ok(())
}
/// Write a value with an associated name.
///
/// The default implementation simply delegates to [`SerializeInner::_serialize_inner`].
/// Other implementations might use the name information (e.g., [`SchemaWriter`]),
/// but they must in the end delegate to [`SerializeInner::_serialize_inner`].
fn write<V: SerializeInner>(&mut self, _field_name: &str, value: &V) -> Result<()> {
value._serialize_inner(self)
}
/// Write the memory representation of a (slice of a) zero-copy type.
///
/// The default implementation simply delegates to [`WriteNoStd::write_all`].
/// Other implementations might use the type information in `V` (e.g., [`SchemaWriter`]),
/// but they must in the end delegate to [`WriteNoStd::write_all`].
fn write_bytes<V: SerializeInner + ZeroCopy>(&mut self, value: &[u8]) -> Result<()> {
self.write_all(value)
}
}
impl<F: WriteNoStd> WriteWithNames for WriterWithPos<'_, F> {}
/// Information about data written during serialization, either fields or
/// ancillary data such as option tags and slice lengths.
#[derive(Debug, Clone)]
pub struct SchemaRow {
/// Name of the piece of data.
pub field: String,
/// Type of the piece of data.
pub ty: String,
/// Offset from the start of the file.
pub offset: usize,
/// Length in bytes of the piece of data.
pub size: usize,
/// The alignment needed by the piece of data, zero if not applicable
/// (e.g., primitive fields, ancillary data, or structures).
pub align: usize,
}
#[derive(Default, Debug, Clone)]
/// A vector containing all the fields written during serialization, including
/// ancillary data such as slice lengths and [`Option`] tags.
pub struct Schema(pub Vec<SchemaRow>);
impl Schema {
/// Return a CSV representation of the schema, including data.
///
/// WARNING: the size of the CSV will be larger than the size of the
/// serialized file, so it is not a good idea to call this method
/// on big structures.
pub fn debug(&self, data: &[u8]) -> String {
let mut result = "field,offset,align,size,ty,bytes\n".to_string();
for i in 0..self.0.len().saturating_sub(1) {
let row = &self.0[i];
// if it's a composed type, don't print the bytes
if row.offset == self.0[i + 1].offset {
result.push_str(&format!(
"{},{},{},{},{},\n",
row.field, row.offset, row.align, row.size, row.ty,
));
} else {
result.push_str(&format!(
"{},{},{},{},{},{:02x?}\n",
row.field,
row.offset,
row.align,
row.size,
row.ty,
&data[row.offset..row.offset + row.size],
));
}
}
// the last field can't be a composed type by definition
if let Some(row) = self.0.last() {
result.push_str(&format!(
"{},{},{},{},{},{:02x?}\n",
row.field,
row.offset,
row.align,
row.size,
row.ty,
&data[row.offset..row.offset + row.size],
));
}
result
}
/// Return a CSV representation of the schema, excluding data.
pub fn to_csv(&self) -> String {
let mut result = "field,offset,align,size,ty\n".to_string();
for row in &self.0 {
result.push_str(&format!(
"{},{},{},{},{}\n",
row.field, row.offset, row.align, row.size, row.ty
));
}
result
}
}
/// A [`WriteWithNames`] that keeps track of the data written on an underlying
/// [`WriteWithPos`] in a [`Schema`].
pub struct SchemaWriter<'a, W> {
/// The schema so far.
pub schema: Schema,
/// A recursively-built sequence of previous names.
path: Vec<String>,
/// What we actually write on.
writer: &'a mut W,
}
impl<'a, W: WriteWithPos> SchemaWriter<'a, W> {
#[inline(always)]
/// Create a new empty [`SchemaWriter`] on top of a generic writer `W`.
pub fn new(backend: &'a mut W) -> Self {
Self {
schema: Default::default(),
path: vec![],
writer: backend,
}
}
}
impl<W: WriteNoStd> WriteNoStd for SchemaWriter<'_, W> {
fn write_all(&mut self, buf: &[u8]) -> ser::Result<()> {
self.writer.write_all(buf)
}
fn flush(&mut self) -> ser::Result<()> {
self.writer.flush()
}
}
impl<W: WriteWithPos> WriteWithPos for SchemaWriter<'_, W> {
fn pos(&self) -> usize {
self.writer.pos()
}
}
/// WARNING: these implementations must be kept in sync with the ones
/// in the default implementation of [`WriteWithNames`].
impl<W: WriteWithPos> WriteWithNames for SchemaWriter<'_, W> {
#[inline(always)]
fn align<T: MaxSizeOf>(&mut self) -> Result<()> {
let padding = pad_align_to(self.pos(), T::max_size_of());
if padding != 0 {
self.schema.0.push(SchemaRow {
field: "PADDING".into(),
ty: format!("[u8; {}]", padding),
offset: self.pos(),
size: padding,
align: 1,
});
for _ in 0..padding {
self.write_all(&[0])?;
}
}
Ok(())
}
#[inline(always)]
fn write<V: SerializeInner>(&mut self, field_name: &str, value: &V) -> Result<()> {
// prepare a row with the field name and the type
self.path.push(field_name.into());
let pos = self.pos();
let len = self.schema.0.len();
value._serialize_inner(self)?;
// This is slightly inefficient because we have to shift
// the whole vector, but it's not a big deal and it keeps
// the schema in the correct order.
self.schema.0.insert(
len,
SchemaRow {
field: self.path.join("."),
ty: core::any::type_name::<V>().to_string(),
offset: pos,
align: 0,
size: self.pos() - pos,
},
);
self.path.pop();
Ok(())
}
#[inline(always)]
fn write_bytes<V: SerializeInner + ZeroCopy>(&mut self, value: &[u8]) -> Result<()> {
self.path.push("zero".to_string());
// Note that we are writing the schema row of the field before
// having written its content.
self.schema.0.push(SchemaRow {
field: self.path.join("."),
ty: core::any::type_name::<V>().to_string(),
offset: self.pos(),
size: value.len(),
align: V::max_size_of(),
});
self.path.pop();
self.write_all(value)
}
}