1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
use crate::error::Result;
use crate::reader::BinaryReader;
use crate::types::GenericType;
use std::io::{Cursor, Read, Seek};
/// A field descriptor within a GenericDataHeader.
#[derive(Debug, Clone)]
pub struct GenericDataDescriptor {
pub field_type: GenericType,
pub length: u32,
pub label: String,
}
/// Self-describing header for GenericRecord streams.
#[derive(Debug)]
pub struct GenericDataHeader {
pub fields: Vec<GenericDataDescriptor>,
}
/// A typed value from a generic record.
#[derive(Debug, Clone)]
pub enum GenericValue {
Gap,
Int8(i8),
Bool(bool),
UInt8(u8),
Int16(i16),
UInt16(u16),
Int32(i32),
UInt32(u32),
Float32(f32),
Float64(f64),
String(String),
}
/// A single record decoded using a GenericDataHeader.
#[derive(Debug)]
pub struct GenericRecord {
pub values: Vec<(String, GenericValue)>,
}
impl GenericDataHeader {
/// Try to read a GenericDataHeader. Returns `None` if the data at the
/// current position does not look like a valid header (e.g. unreasonable
/// field count or invalid type codes). The reader position is restored
/// on failure.
pub(crate) fn try_read<R: Read + Seek>(r: &mut BinaryReader<R>) -> Result<Option<Self>> {
let saved_pos = r.position();
let n = r.read_u32()?;
// A genuine schema has at least a couple of fields and fewer than ~500.
// The error-log "gap" region that precedes the schema in v64+ can
// otherwise mislead us by looking like a 0- or 1-field header.
if !(2..=500).contains(&n) {
r.seek_to(saved_pos)?;
return Ok(None);
}
let mut fields = Vec::with_capacity(n as usize);
for _ in 0..n {
let type_code = r.read_u32()?;
match GenericType::from_u32(type_code) {
Some(field_type) => {
let length = r.read_u32()?;
// Character count of the label. Real Thermo labels are
// short and printable; require it to look sane or the
// whole header is bogus.
let label_start = r.position();
let char_count = r.read_u32()?;
if char_count > 200 {
r.seek_to(saved_pos)?;
return Ok(None);
}
r.seek_to(label_start)?;
let label = match r.read_pascal_string() {
Ok(s) => s,
Err(crate::error::Error::InvalidUtf16(_)) => {
r.seek_to(saved_pos)?;
return Ok(None);
}
Err(e) => return Err(e),
};
if !label_is_plausible(&label) {
r.seek_to(saved_pos)?;
return Ok(None);
}
fields.push(GenericDataDescriptor {
field_type,
length,
label,
});
}
None => {
r.seek_to(saved_pos)?;
return Ok(None);
}
}
}
let hdr = Self { fields };
if !hdr.looks_meaningful() {
r.seek_to(saved_pos)?;
return Ok(None);
}
Ok(Some(hdr))
}
/// A schema is "meaningful" if it contains at least a few fields with
/// real labels and has a non-trivial fixed record size. Used to reject
/// false positives picked up by the forward scan.
fn looks_meaningful(&self) -> bool {
let named = self.fields.iter().filter(|f| !f.label.is_empty()).count();
named >= 2 && self.fixed_record_size() > 0
}
/// Sum of fixed byte sizes contributed by each descriptor. For variable
/// types (String/WideString) the descriptor's `length` field is used as
/// the storage allocation - which is the fixed on-disk size per record.
pub(crate) fn fixed_record_size(&self) -> usize {
self.fields
.iter()
.map(|f| match f.field_type {
GenericType::Gap => 0,
GenericType::Int8
| GenericType::Bool
| GenericType::BoolYesNo
| GenericType::BoolOnOff
| GenericType::UInt8 => 1,
GenericType::Int16 | GenericType::UInt16 => 2,
GenericType::Int32 | GenericType::UInt32 | GenericType::Float32 => 4,
GenericType::Float64 => 8,
GenericType::AsciiString => f.length as usize,
GenericType::WideString => f.length as usize * 2,
})
.sum()
}
/// Scan forward from the current position for a plausible GenericDataHeader
/// in a bounded window. The v64+ error-log region contains padding bytes
/// before the scan-parameters schema whose size isn't easily computed, so
/// we locate the schema by scanning for a valid signature.
pub(crate) fn find_forward<R: Read + Seek>(
r: &mut BinaryReader<R>,
max_scan: u64,
expected_record_size: Option<usize>,
) -> Result<Option<Self>> {
let start = r.position();
let cap = max_scan.min(4 * 1024 * 1024) as usize;
r.seek_to(start)?;
let buf = r.read_bytes(cap)?;
// Two passes: first require the schema's fixed record size to match
// the tail; second accept any meaningful schema.
//
// Parse candidates entirely from the in-memory buffer using a Cursor so
// that we never seek the underlying file reader for each false positive.
// This avoids O(n) file seeks when the error-log gap is large (>1 MB).
for pass in 0..2 {
let mut offset = 0usize;
while offset + 4 <= buf.len() {
let n = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
if (2..=500).contains(&n) {
let mut cursor = BinaryReader::new(Cursor::new(&buf[offset..]));
if let Some(hdr) = Self::try_read(&mut cursor)? {
let size_ok = match (pass, expected_record_size) {
(0, Some(want)) => hdr.fixed_record_size() == want,
_ => true,
};
if size_ok {
return Ok(Some(hdr));
}
}
}
offset += 2;
}
if expected_record_size.is_none() {
break;
}
}
r.seek_to(start)?;
Ok(None)
}
}
/// Heuristic: a GDH field label must either be empty or have reasonable
/// length. Labels are sometimes short single-character sentinels so we
/// don't require printability.
fn label_is_plausible(s: &str) -> bool {
s.len() <= 200
}
impl GenericRecord {
pub(crate) fn read<R: Read + Seek>(
r: &mut BinaryReader<R>,
header: &GenericDataHeader,
) -> Result<Self> {
let mut values = Vec::with_capacity(header.fields.len());
for desc in &header.fields {
let label = desc.label.clone();
let value = match desc.field_type {
GenericType::Gap => GenericValue::Gap,
GenericType::Int8 => GenericValue::Int8(r.read_i8()?),
GenericType::Bool | GenericType::BoolYesNo | GenericType::BoolOnOff => {
GenericValue::Bool(r.read_u8()? != 0)
}
GenericType::UInt8 => GenericValue::UInt8(r.read_u8()?),
GenericType::Int16 => GenericValue::Int16(r.read_i16()?),
GenericType::UInt16 => GenericValue::UInt16(r.read_u16()?),
GenericType::Int32 => GenericValue::Int32(r.read_i32()?),
GenericType::UInt32 => GenericValue::UInt32(r.read_u32()?),
GenericType::Float32 => GenericValue::Float32(r.read_f32()?),
GenericType::Float64 => GenericValue::Float64(r.read_f64()?),
GenericType::AsciiString => {
let s = if desc.length > 0 {
let bytes = r.read_bytes(desc.length as usize)?;
let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
String::from_utf8_lossy(&bytes[..end]).into_owned()
} else {
String::new()
};
GenericValue::String(s)
}
GenericType::WideString => {
let s = if desc.length > 0 {
r.read_utf16_fixed(desc.length as usize * 2)?
} else {
String::new()
};
GenericValue::String(s)
}
};
values.push((label, value));
}
Ok(Self { values })
}
/// Look up a field by label and return a reference to its value.
pub fn get(&self, label: &str) -> Option<&GenericValue> {
self.values.iter().find(|(l, _)| l == label).map(|(_, v)| v)
}
/// Get a float64 field by label.
pub fn get_f64(&self, label: &str) -> Option<f64> {
match self.get(label)? {
GenericValue::Float64(v) => Some(*v),
GenericValue::Float32(v) => Some(*v as f64),
_ => None,
}
}
/// Get a float32 field by label.
pub fn get_f32(&self, label: &str) -> Option<f32> {
match self.get(label)? {
GenericValue::Float32(v) => Some(*v),
GenericValue::Float64(v) => Some(*v as f32),
_ => None,
}
}
/// Get an i32 field by label.
pub fn get_i32(&self, label: &str) -> Option<i32> {
match self.get(label)? {
GenericValue::Int32(v) => Some(*v),
GenericValue::Int16(v) => Some(*v as i32),
GenericValue::Int8(v) => Some(*v as i32),
_ => None,
}
}
/// Get a string field by label.
pub fn get_string(&self, label: &str) -> Option<&str> {
match self.get(label)? {
GenericValue::String(v) => Some(v.as_str()),
_ => None,
}
}
}
impl GenericValue {
/// Get as f64, converting numeric types.
pub fn as_f64(&self) -> Option<f64> {
match self {
Self::Float64(v) => Some(*v),
Self::Float32(v) => Some(*v as f64),
Self::Int32(v) => Some(*v as f64),
Self::UInt32(v) => Some(*v as f64),
Self::Int16(v) => Some(*v as f64),
Self::UInt16(v) => Some(*v as f64),
Self::Int8(v) => Some(*v as f64),
Self::UInt8(v) => Some(*v as f64),
_ => None,
}
}
}