1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
//! DWG Merged Reader — R2007+ three-stream demultiplexer
//!
//! In DWG R2007+, each object/section record is encoded as three
//! interleaved streams:
//!
//! ```text
//! |---main---|---text---|flag|---handles---|
//! ```
//!
//! `DwgMergedReader` transparently routes reads to the correct sub-reader:
//! - Data reads → main reader
//! - `read_variable_text()` → text reader
//! - `read_handle()` → handle reader
//!
//! For pre-R2007, all reads go to the main reader (two-stream mode where
//! text is inline in the main stream).
//!
//! Based on ACadSharp's `DwgMergedReader`.
use crate::io::dwg::dwg_stream_readers::bit_reader::DwgBitReader;
use crate::io::dwg::dwg_version::DwgVersion;
use crate::types::{Color, DxfVersion, Vector2, Vector3};
/// Merge mode, determined by DWG version.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MergeMode {
/// R13–R2004: Two-stream (main + handle). Text is inline in main.
TwoStream,
/// R2007+: Three-stream (main + text + handle).
ThreeStream,
}
/// Merged reader that transparently demultiplexes three-stream R2007+ data.
///
/// For pre-R2007, this acts as a simple passthrough to the main reader.
pub struct DwgMergedReader {
/// Main data reader
main: DwgBitReader,
/// Text reader (R2007+ only; for pre-R2007, text reads from main)
text: Option<DwgBitReader>,
/// Handle reader (split from main after handle_start_bits)
handle: Option<DwgBitReader>,
/// Merge mode
_mode: MergeMode,
/// DXF version
dxf_version: DxfVersion,
/// Raw data (kept for lazy text/handle setup in ThreeStream mode)
raw_data: Option<Vec<u8>>,
/// Handle-stream bit count from the R2010+ MC framing field.
/// Stored so unknown entities can reproduce the correct framing on write.
handle_bits: i64,
/// Reference handle for offset-based handle codes (6/8/A/C).
///
/// In DWG, handle references with codes 6, 8, 0xA, 0xC are relative to
/// the current object's own handle. This field should be set via
/// `set_ref_handle()` right after reading the object's handle from the
/// main stream.
ref_handle: u64,
/// Bit position where the handle stream starts.
/// For R2007: equals the RL field (total_size_bits).
/// For R2010+: equals total_data_bits - handle_bits.
/// For pre-R2007: equals handle_start_bits from the constructor.
handle_start_bit: i64,
}
impl DwgMergedReader {
/// Create a merged reader from raw section data.
///
/// For R2007+, this splits the data into three sub-readers based on
/// the embedded stream boundaries.
///
/// For pre-R2007, the data is split into main (up to handle_start_bits)
/// and handle (from handle_start_bits onward).
///
/// # Arguments
/// * `data` - Raw section data (the merged stream bytes)
/// * `dxf_version` - DXF version for version-specific parsing
/// * `handle_start_bits` - Bit position where handle data begins
/// (only used for two-stream mode; for three-stream, computed from flags)
pub fn new(
data: Vec<u8>,
dxf_version: DxfVersion,
handle_start_bits: i64,
) -> Self {
let dwg = DwgVersion::from_dxf_version(dxf_version)
.unwrap_or(DwgVersion::AC15);
let mode = if dxf_version >= DxfVersion::AC1021 {
MergeMode::ThreeStream
} else {
MergeMode::TwoStream
};
match mode {
MergeMode::TwoStream => {
// Two-stream: main = data[:handle_start], handle = data[handle_start:]
let main = DwgBitReader::new(data.clone(), dwg, dxf_version);
// Create handle reader from remaining bytes
let handle_start_byte = (handle_start_bits / 8) as usize;
let handle_data = if handle_start_byte < data.len() {
data[handle_start_byte..].to_vec()
} else {
Vec::new()
};
let handle = DwgBitReader::new(handle_data, dwg, dxf_version);
DwgMergedReader {
main,
text: None,
handle: Some(handle),
_mode: mode,
dxf_version,
raw_data: None,
handle_bits: 0,
ref_handle: 0,
handle_start_bit: handle_start_bits,
}
}
MergeMode::ThreeStream => {
// Three-stream: lazy setup.
// Don't read BL or set up text/handle readers here.
// The BL is not at position 0 — it comes after the type code.
// Text and handle readers will be set up later via
// setup_text_and_handle() after the caller reads the BL.
let main_reader = DwgBitReader::new(data.clone(), dwg, dxf_version);
DwgMergedReader {
main: main_reader,
text: None,
handle: None,
_mode: mode,
dxf_version,
raw_data: Some(data),
handle_bits: 0,
ref_handle: 0,
handle_start_bit: 0, // set later when RL is known
}
}
}
}
/// Create a merged reader from separate pre-split streams.
///
/// Used when the caller has already separated the three streams
/// (e.g., after decompression of individual section pages).
pub fn from_readers(
main: DwgBitReader,
text: Option<DwgBitReader>,
handle: Option<DwgBitReader>,
dxf_version: DxfVersion,
) -> Self {
let mode = if text.is_some() {
MergeMode::ThreeStream
} else {
MergeMode::TwoStream
};
DwgMergedReader {
main,
text,
handle,
_mode: mode,
dxf_version,
raw_data: None,
handle_bits: 0,
ref_handle: 0,
handle_start_bit: 0,
}
}
/// Set up text and handle readers for ThreeStream mode.
///
/// Called after the caller reads the RL (total_size_bits) from the main stream.
/// The RL is written by `save_position_for_size` in the writer.
/// It stores **one past** the text-present flag bit position (same convention
/// as per-object records): flag is at RL − 1, handles start at the next
/// byte boundary after RL.
///
/// Layout: `[RL][main_data...][text...][modular_short][flag@RL-1][pad][handles...]`
pub fn setup_text_and_handle(&mut self, total_size_bits: i64) {
if let Some(ref data) = self.raw_data {
let dwg = DwgVersion::from_dxf_version(self.dxf_version)
.unwrap_or(DwgVersion::AC15);
// Text reader — the flag bit is at RL − 1 (per-object convention,
// matching the classes reader and object reader).
let mut text_reader = DwgBitReader::new(data.clone(), dwg, self.dxf_version);
text_reader.set_position_by_flag(total_size_bits - 1);
self.text = Some(text_reader);
// Handle reader — starts at the next byte boundary after the flag bit.
let handle_start = ((total_size_bits + 1 + 7) / 8) * 8;
let mut handle_reader = DwgBitReader::new(data.clone(), dwg, self.dxf_version);
handle_reader.set_position_in_bits(handle_start);
self.handle = Some(handle_reader);
}
}
/// Return a clone of the full merged-stream record bytes.
///
/// The bytes represent the complete payload between the
/// ModularShort length prefix and the CRC-16 trailer.
/// Used to preserve raw data for unknown entity round-trips.
pub fn raw_merged_data(&self) -> Vec<u8> {
self.main.data_bytes()
}
/// Set the handle-bits value (from R2010+ MC framing).
pub fn set_handle_bits(&mut self, bits: i64) {
self.handle_bits = bits;
}
/// Get the handle-bits value stored by the reader.
pub fn get_handle_bits(&self) -> i64 {
self.handle_bits
}
// ════════════════════════════════════════════════════════════════════════
// Data reads — always from main reader
// ════════════════════════════════════════════════════════════════════════
pub fn read_bit(&mut self) -> bool { self.main.read_bit() }
pub fn read_byte(&mut self) -> u8 { self.main.read_byte() }
pub fn read_bytes(&mut self, length: usize) -> Vec<u8> { self.main.read_bytes(length) }
pub fn read_bit_short(&mut self) -> i16 { self.main.read_bit_short() }
pub fn read_bit_long(&mut self) -> i32 { self.main.read_bit_long() }
pub fn read_bit_long_long(&mut self) -> i64 { self.main.read_bit_long_long() }
pub fn read_bit_double(&mut self) -> f64 { self.main.read_bit_double() }
pub fn read_raw_long(&mut self) -> i64 { self.main.read_raw_long() }
pub fn read_raw_short(&mut self) -> i16 { self.main.read_raw_short() }
pub fn read_raw_double(&mut self) -> f64 { self.main.read_raw_double() }
pub fn read_2bit_double(&mut self) -> Vector2 { self.main.read_2bit_double() }
pub fn read_3bit_double(&mut self) -> Vector3 { self.main.read_3bit_double() }
pub fn read_2raw_double(&mut self) -> Vector2 { self.main.read_2raw_double() }
pub fn read_bit_extrusion(&mut self) -> Vector3 { self.main.read_bit_extrusion() }
pub fn read_bit_thickness(&mut self) -> f64 { self.main.read_bit_thickness() }
pub fn read_bit_double_with_default(&mut self, default: f64) -> f64 {
self.main.read_bit_double_with_default(default)
}
pub fn read_cm_color(&mut self) -> Color { self.main.read_cm_color() }
pub fn read_en_color(&mut self) -> (Color, crate::types::Transparency, bool) {
self.main.read_en_color()
}
pub fn read_color_by_index(&mut self) -> Color { self.main.read_color_by_index() }
pub fn read_modular_char(&mut self) -> u64 { self.main.read_modular_char() }
pub fn read_signed_modular_char(&mut self) -> i64 { self.main.read_signed_modular_char() }
pub fn read_modular_short(&mut self) -> i32 { self.main.read_modular_short() }
pub fn read_object_type(&mut self) -> i16 { self.main.read_object_type() }
// ════════════════════════════════════════════════════════════════════════
// Text reads — from text reader for R2007+, main for pre-R2007
// ════════════════════════════════════════════════════════════════════════
/// Read a variable-length text string.
///
/// For R2007+, this reads from the separate text stream (UTF-16LE).
/// For pre-R2007, this reads from the main stream.
pub fn read_variable_text(&mut self) -> String {
match &mut self.text {
Some(text_reader) => text_reader.read_variable_text(),
None => self.main.read_variable_text(),
}
}
/// Read a text string, but always from the main stream.
///
/// Used for fields that are always inline even in R2007+.
pub fn read_text_inline(&mut self) -> String {
self.main.read_variable_text()
}
// ════════════════════════════════════════════════════════════════════════
// Handle reads — from handle reader if available, else main
// ════════════════════════════════════════════════════════════════════════
/// Reposition the handle reader to a new bit position.
///
/// Used for R13/R14 where the handle-stream split point (RL) is
/// discovered inside the entity preamble rather than at the top of
/// the record.
pub fn reposition_handle_reader(&mut self, bit_position: i64) {
if let Some(ref mut handle_reader) = self.handle {
handle_reader.set_position_in_bits(bit_position);
}
}
/// Read a handle reference.
///
/// For R2007+, this reads from the separate handle stream.
/// For pre-R2007, this reads from the main stream.
///
/// Offset-type codes (6/8/A/C) are resolved relative to `ref_handle`,
/// which should be set to the current object's handle via
/// `set_ref_handle()` after reading the object preface.
pub fn read_handle(&mut self) -> u64 {
match &mut self.handle {
Some(handle_reader) => handle_reader.read_handle_relative(self.ref_handle),
None => self.main.read_handle_relative(self.ref_handle),
}
}
/// Set the reference handle for offset-based handle codes.
///
/// Must be called after reading the current object's own handle
/// from the main stream (via `read_common_data`).
pub fn set_ref_handle(&mut self, handle: u64) {
self.ref_handle = handle;
}
/// Read a handle reference relative to a base handle.
pub fn read_handle_reference(&mut self, ref_handle: u64) -> (u64, crate::io::dwg::dwg_reference_type::DwgReferenceType) {
match &mut self.handle {
Some(handle_reader) => {
let mut ref_type = crate::io::dwg::dwg_reference_type::DwgReferenceType::Undefined;
let h = handle_reader.read_handle_reference(ref_handle, &mut ref_type);
(h, ref_type)
}
None => {
let mut ref_type = crate::io::dwg::dwg_reference_type::DwgReferenceType::Undefined;
let h = self.main.read_handle_reference(ref_handle, &mut ref_type);
(h, ref_type)
}
}
}
// ════════════════════════════════════════════════════════════════════════
// Position and state queries
// ════════════════════════════════════════════════════════════════════════
/// Main reader bit position.
pub fn position_in_bits(&self) -> i64 { self.main.position_in_bits() }
/// Main reader byte position.
pub fn position(&self) -> usize { self.main.position() }
/// Set main reader position.
pub fn set_position_in_bits(&mut self, pos: i64) { self.main.set_position_in_bits(pos); }
/// Get the DXF version.
pub fn dxf_version(&self) -> DxfVersion { self.dxf_version }
/// Get a mutable reference to the main reader (for direct access).
pub fn main_mut(&mut self) -> &mut DwgBitReader { &mut self.main }
/// Get a reference to the main reader.
pub fn main(&self) -> &DwgBitReader { &self.main }
/// Get the bit position where the handle stream starts.
/// For R2007: this equals the RL (total_size_bits) field.
/// Returns 0 if not set (pre-R2007 three-stream or no handle reader).
pub fn handle_start(&self) -> i64 { self.handle_start_bit }
/// Set the bit position where the handle stream starts.
pub fn set_handle_start(&mut self, bit: i64) { self.handle_start_bit = bit; }
}
// ════════════════════════════════════════════════════════════════════════════
// Tests
// ════════════════════════════════════════════════════════════════════════════
#[cfg(test)]
mod tests {
use super::*;
use crate::io::dwg::dwg_stream_writers::bit_writer::DwgBitWriter;
#[test]
fn test_two_stream_from_readers() {
// Create main stream with data + text
let dwg = DwgVersion::AC15;
let version = DxfVersion::AC1015;
let mut main_writer = DwgBitWriter::new(dwg, version);
main_writer.write_bit_short(42);
main_writer.write_bit_double(3.14);
main_writer.write_variable_text("hello");
// Create handle stream
let mut handle_writer = DwgBitWriter::new(dwg, version);
handle_writer.write_handle_undefined(0x1A);
let main_data = main_writer.to_bytes();
let handle_data = handle_writer.to_bytes();
let main = DwgBitReader::new(main_data, dwg, version);
let handle = DwgBitReader::new(handle_data, dwg, version);
let mut reader = DwgMergedReader::from_readers(main, None, Some(handle), version);
assert_eq!(reader.read_bit_short(), 42);
assert!((reader.read_bit_double() - 3.14).abs() < 1e-10);
assert_eq!(reader.read_variable_text(), "hello");
let h = reader.read_handle();
assert_eq!(h, 0x1A);
}
#[test]
fn test_from_readers_passthrough() {
let dwg = DwgVersion::AC15;
let version = DxfVersion::AC1015;
let mut writer = DwgBitWriter::new(dwg, version);
writer.write_bit_short(99);
writer.write_bit_double(2.71);
let data = writer.to_bytes();
let main = DwgBitReader::new(data, dwg, version);
let mut reader = DwgMergedReader::from_readers(main, None, None, version);
assert_eq!(reader.read_bit_short(), 99);
assert!((reader.read_bit_double() - 2.71).abs() < 1e-10);
}
#[test]
fn test_three_stream_text_routing() {
// Verify that text reads go to the text reader when one is provided
let dwg = DwgVersion::AC21;
let version = DxfVersion::AC1021;
// Main stream: numeric data
let mut main_writer = DwgBitWriter::new(dwg, version);
main_writer.write_bit_short(77);
main_writer.write_bit_double(1.5);
let main_data = main_writer.to_bytes();
// Text stream: also numeric data, but routed separately
// We write text using the TU format (write_text_unicode)
let mut text_writer = DwgBitWriter::new(dwg, version);
text_writer.write_variable_text("world");
let text_data = text_writer.to_bytes();
// Handle stream
let mut handle_writer = DwgBitWriter::new(dwg, version);
handle_writer.write_handle_undefined(0x42);
let handle_data = handle_writer.to_bytes();
let main = DwgBitReader::new(main_data, dwg, version);
let text = DwgBitReader::new(text_data, dwg, version);
let handle = DwgBitReader::new(handle_data, dwg, version);
let mut reader = DwgMergedReader::from_readers(main, Some(text), Some(handle), version);
// Data from main
assert_eq!(reader.read_bit_short(), 77);
assert!((reader.read_bit_double() - 1.5).abs() < 1e-10);
// Text from separate text reader (uses read_text_unicode internally for R2007+)
let t = reader.read_variable_text();
assert_eq!(t, "world");
// Handle from separate handle reader
let h = reader.read_handle();
assert_eq!(h, 0x42);
}
}