Skip to main content

oracledb_protocol/thin/
dbobject.rs

1#![forbid(unsafe_code)]
2
3use super::*;
4
5pub struct DbObjectPackedReader<'a> {
6    bytes: &'a [u8],
7    pos: usize,
8}
9
10impl<'a> DbObjectPackedReader<'a> {
11    pub fn new(bytes: &'a [u8]) -> Self {
12        Self { bytes, pos: 0 }
13    }
14
15    pub fn read_u8(&mut self) -> Result<u8> {
16        let value = self
17            .bytes
18            .get(self.pos)
19            .copied()
20            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
21        self.pos += 1;
22        Ok(value)
23    }
24
25    fn read_raw(&mut self, len: usize) -> Result<&'a [u8]> {
26        let end = self.pos.checked_add(len).ok_or(ProtocolError::TtcDecode(
27            "DbObject packed data offset overflow",
28        ))?;
29        let bytes = self
30            .bytes
31            .get(self.pos..end)
32            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
33        self.pos = end;
34        Ok(bytes)
35    }
36
37    fn skip(&mut self, len: usize) -> Result<()> {
38        self.read_raw(len).map(|_| ())
39    }
40
41    fn read_u32be(&mut self) -> Result<u32> {
42        let bytes = self.read_raw(4)?;
43        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
44            ProtocolError::TtcDecode("invalid DbObject u32")
45        })?))
46    }
47
48    pub fn read_i32be(&mut self) -> Result<i32> {
49        let bytes = self.read_raw(4)?;
50        Ok(i32::from_be_bytes(bytes.try_into().map_err(|_| {
51            ProtocolError::TtcDecode("invalid DbObject i32")
52        })?))
53    }
54
55    pub fn read_length(&mut self) -> Result<usize> {
56        match self.read_u8()? {
57            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
58                .map_err(|_| ProtocolError::TtcDecode("DbObject length overflow")),
59            length => Ok(usize::from(length)),
60        }
61    }
62
63    fn skip_length(&mut self) -> Result<()> {
64        if self.read_u8()? == TNS_LONG_LENGTH_INDICATOR {
65            self.skip(4)?;
66        }
67        Ok(())
68    }
69
70    pub fn read_value_bytes(&mut self) -> Result<Option<Vec<u8>>> {
71        let length = match self.read_u8()? {
72            0 | TNS_NULL_LENGTH_INDICATOR => return Ok(None),
73            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
74                .map_err(|_| ProtocolError::TtcDecode("DbObject value length overflow"))?,
75            length => usize::from(length),
76        };
77        Ok(Some(self.read_raw(length)?.to_vec()))
78    }
79
80    pub fn read_header(&mut self) -> Result<()> {
81        let flags = self.read_u8()?;
82        let _version = self.read_u8()?;
83        self.skip_length()?;
84        if flags & TNS_OBJ_IS_DEGENERATE != 0 {
85            return Err(ProtocolError::UnsupportedFeature(
86                "DbObject stored in a LOB",
87            ));
88        }
89        if flags & TNS_OBJ_NO_PREFIX_SEG == 0 {
90            let prefix_len = self.read_length()?;
91            self.skip(prefix_len)?;
92        }
93        Ok(())
94    }
95
96    fn bytes_left(&self) -> usize {
97        self.bytes.len().saturating_sub(self.pos)
98    }
99
100    /// Bytes still unread in the packed image. Exposed so a caller materializing
101    /// a collection (whose element count is a server-declared `read_length`) can
102    /// bound its `Vec` pre-allocation against the buffer via the
103    /// [`BoundedReader`](crate::wire::BoundedReader) trait — closing the
104    /// OOM-from-length class for DbObject collections too.
105    pub fn remaining(&self) -> usize {
106        self.bytes_left()
107    }
108
109    pub fn read_atomic_null(&mut self, is_collection_context: bool) -> Result<bool> {
110        let value = self.read_u8()?;
111        match (value, is_collection_context) {
112            (TNS_OBJ_ATOMIC_NULL, _) | (TNS_NULL_LENGTH_INDICATOR, true) => Ok(true),
113            _ => {
114                self.pos = self.pos.saturating_sub(1);
115                Ok(false)
116            }
117        }
118    }
119}
120
121impl crate::wire::BoundedReader for DbObjectPackedReader<'_> {
122    fn remaining(&self) -> usize {
123        self.bytes_left()
124    }
125}
126
127/// Writes a length-prefixed value into a DbObject pickle image buffer using the
128/// inner-buffer scheme (252 short cutoff, 32767 chunks for the long form). This
129/// mirrors `Buffer.write_bytes_with_length` used by `_pack_value`
130/// (reference impl/thin/packet.pyx) — NOT the 245-cutoff `write_length`.
131pub fn image_write_value_bytes(buf: &mut Vec<u8>, value: &[u8]) -> Result<()> {
132    if value.len() <= crate::wire::TNS_MAX_SHORT_LENGTH {
133        buf.push(value.len() as u8);
134        buf.extend_from_slice(value);
135        return Ok(());
136    }
137    buf.push(TNS_LONG_LENGTH_INDICATOR);
138    for chunk in value.chunks(32_767) {
139        image_write_ub4(
140            buf,
141            u32::try_from(chunk.len()).map_err(|_| ProtocolError::InvalidPacketLength {
142                length: chunk.len(),
143                minimum: 0,
144            })?,
145        );
146        buf.extend_from_slice(chunk);
147    }
148    image_write_ub4(buf, 0);
149    Ok(())
150}
151
152/// Writes a `ub4` into a pickle image buffer (reference `write_ub4`).
153pub(crate) fn image_write_ub4(buf: &mut Vec<u8>, value: u32) {
154    if value == 0 {
155        buf.push(0);
156    } else if value <= u32::from(u8::MAX) {
157        buf.push(1);
158        buf.push(value as u8);
159    } else if value <= u32::from(u16::MAX) {
160        buf.push(2);
161        buf.extend_from_slice(&(value as u16).to_be_bytes());
162    } else {
163        buf.push(4);
164        buf.extend_from_slice(&value.to_be_bytes());
165    }
166}
167
168/// Writes a collection/element count length into a pickle image buffer using
169/// the 245-cutoff scheme (reference `DbObjectPickleBuffer.write_length`).
170pub fn image_write_length(buf: &mut Vec<u8>, length: usize) -> Result<()> {
171    if length <= TNS_OBJ_MAX_SHORT_LENGTH {
172        buf.push(length as u8);
173    } else {
174        buf.push(TNS_LONG_LENGTH_INDICATOR);
175        buf.extend_from_slice(
176            &u32::try_from(length)
177                .map_err(|_| ProtocolError::InvalidPacketLength { length, minimum: 0 })?
178                .to_be_bytes(),
179        );
180    }
181    Ok(())
182}
183
184/// Builds the pickle image header (reference `write_header` + image_flags from
185/// `create_new_object`). Returns the buffer pre-seeded with the header; the
186/// caller appends the body and then calls [`image_finalize`] to back-patch the
187/// total size (4-byte BE at offset 3).
188pub fn image_begin(is_collection: bool) -> Vec<u8> {
189    let mut image_flags = TNS_OBJ_IS_VERSION_81;
190    if is_collection {
191        image_flags |= TNS_OBJ_IS_COLLECTION;
192    } else {
193        image_flags |= TNS_OBJ_NO_PREFIX_SEG;
194    }
195    let mut buf = Vec::new();
196    buf.push(image_flags);
197    buf.push(TNS_OBJ_IMAGE_VERSION);
198    buf.push(TNS_LONG_LENGTH_INDICATOR);
199    buf.extend_from_slice(&0u32.to_be_bytes()); // size placeholder (offset 3)
200    if is_collection {
201        buf.push(1); // length of prefix segment
202        buf.push(1); // prefix segment contents
203    }
204    buf
205}
206
207/// Back-patches the total image size (reference `_get_packed_data`: the 4-byte
208/// BE size at offset 3, after flags + version + 0xFE).
209pub fn image_finalize(buf: &mut [u8]) -> Result<()> {
210    let size = u32::try_from(buf.len()).map_err(|_| ProtocolError::InvalidPacketLength {
211        length: buf.len(),
212        minimum: 0,
213    })?;
214    let slot = buf.get_mut(3..7).ok_or(ProtocolError::TtcDecode(
215        "DbObject image too short to finalize",
216    ))?;
217    slot.copy_from_slice(&size.to_be_bytes());
218    Ok(())
219}
220
221/// Collection flags byte written at the start of a collection body
222/// (`TNS_OBJ_HAS_INDEXES` for associative arrays, else 0). Reference
223/// `_parse_tds` collection_flags + `_pack_data`.
224pub fn collection_flags_for(is_assoc_array: bool) -> u8 {
225    if is_assoc_array {
226        TNS_OBJ_HAS_INDEXES
227    } else {
228        0
229    }
230}
231
232/// Writes a NULL element/attribute marker into the image. Non-collection object
233/// attributes use `TNS_OBJ_ATOMIC_NULL` (253); scalars and collection elements
234/// use `TNS_NULL_LENGTH_INDICATOR` (255). Reference `_pack_value` None branch.
235pub fn image_write_null(buf: &mut Vec<u8>, atomic_null: bool) {
236    if atomic_null {
237        buf.push(TNS_OBJ_ATOMIC_NULL);
238    } else {
239        buf.push(TNS_NULL_LENGTH_INDICATOR);
240    }
241}
242
243/// Packs a single scalar `BindValue` into a DbObject pickle image buffer,
244/// mirroring `_pack_value` (reference impl/thin/dbobject.pyx:247-306). Object
245/// (nested) and Null/Array values are handled by the caller (the pyshim owns
246/// the recursion and null framing); this serves scalar attributes and
247/// collection elements only.
248pub fn pack_bindvalue_into_image(buf: &mut Vec<u8>, value: &BindValue, csfrm: u8) -> Result<()> {
249    match value {
250        BindValue::Text(text) => {
251            let bytes = encode_text_value(text, csfrm);
252            image_write_value_bytes(buf, &bytes)
253        }
254        BindValue::Raw(bytes) => image_write_value_bytes(buf, bytes),
255        BindValue::Number(text) => {
256            let bytes = encode_number_text(text)?;
257            image_write_value_bytes(buf, &bytes)
258        }
259        // PLS_INTEGER / BINARY_INTEGER pack as uint8(4) + uint32be (NOT Oracle
260        // number text) inside an object image.
261        BindValue::BinaryInteger(text) => {
262            let value = parse_binary_integer_u32(text)?;
263            buf.push(4);
264            buf.extend_from_slice(&value.to_be_bytes());
265            Ok(())
266        }
267        // BOOLEAN inside an image is the 4-byte form, NOT [1,1]/[0].
268        BindValue::Boolean(value) => {
269            buf.push(4);
270            buf.extend_from_slice(&u32::from(*value).to_be_bytes());
271            Ok(())
272        }
273        BindValue::BinaryDouble(value) => {
274            let bytes = encode_binary_double(*value);
275            image_write_value_bytes(buf, &bytes)
276        }
277        BindValue::BinaryFloat(value) => {
278            let bytes = encode_binary_float(*value as f32);
279            image_write_value_bytes(buf, &bytes)
280        }
281        BindValue::DateTime {
282            year,
283            month,
284            day,
285            hour,
286            minute,
287            second,
288        } => {
289            let bytes = encode_oracle_date(*year, *month, *day, *hour, *minute, *second)?;
290            image_write_value_bytes(buf, &bytes)
291        }
292        BindValue::Timestamp {
293            year,
294            month,
295            day,
296            hour,
297            minute,
298            second,
299            nanosecond,
300            ora_type_num,
301        } => {
302            let bytes = if matches!(*ora_type_num, ORA_TYPE_NUM_TIMESTAMP_TZ) {
303                encode_oracle_timestamp_tz(
304                    *year,
305                    *month,
306                    *day,
307                    *hour,
308                    *minute,
309                    *second,
310                    *nanosecond,
311                )?
312            } else {
313                encode_oracle_timestamp(*year, *month, *day, *hour, *minute, *second, *nanosecond)?
314            };
315            image_write_value_bytes(buf, &bytes)
316        }
317        BindValue::Lob { locator, .. } => image_write_value_bytes(buf, locator),
318        BindValue::IntervalDS {
319            days,
320            seconds,
321            microseconds,
322        } => {
323            let bytes = encode_interval_ds(*days, *seconds, *microseconds)?;
324            image_write_value_bytes(buf, &bytes)
325        }
326        BindValue::IntervalYM { years, months } => {
327            let bytes = encode_interval_ym(*years, *months)?;
328            image_write_value_bytes(buf, &bytes)
329        }
330        BindValue::Null => {
331            image_write_null(buf, false);
332            Ok(())
333        }
334        _ => Err(ProtocolError::UnsupportedFeature(
335            "DbObject attribute type not supported for input binding",
336        )),
337    }
338}
339
340pub(crate) fn parse_binary_integer_u32(text: &str) -> Result<u32> {
341    let trimmed = text.trim();
342    let parsed: i64 = trimmed
343        .parse()
344        .map_err(|_| ProtocolError::TtcDecode("invalid BINARY_INTEGER value"))?;
345    Ok(parsed as u32)
346}
347
348/// Frames a fully-packed DbObject pickle `image` into the outgoing data row,
349/// replacing the zero stub used for empty OUT binds. Mirrors
350/// `WriteBuffer.write_dbobject` (reference impl/thin/packet.pyx:842-857). The
351/// `toid` is derived from the type `oid` per `create_new_object` (620-622).
352pub fn write_dbobject_bind(writer: &mut TtcWriter, oid: &[u8], image: &[u8]) -> Result<()> {
353    let mut toid = Vec::with_capacity(4 + oid.len() + TNS_EXTENT_OID.len());
354    toid.extend_from_slice(&[0x00, 0x22, TNS_OBJ_NON_NULL_OID, TNS_OBJ_HAS_EXTENT_OID]);
355    toid.extend_from_slice(oid);
356    toid.extend_from_slice(&TNS_EXTENT_OID);
357    writer.write_bytes_with_two_lengths(Some(&toid))?;
358    writer.write_bytes_with_two_lengths(Some(oid))?;
359    writer.write_ub4(0); // snapshot
360    writer.write_ub4(0); // version
361    writer.write_ub4(u32::try_from(image.len()).map_err(|_| {
362        ProtocolError::InvalidPacketLength {
363            length: image.len(),
364            minimum: 0,
365        }
366    })?);
367    writer.write_ub4(TNS_OBJ_TOP_LEVEL);
368    writer.write_bytes_with_length(image)
369}
370
371pub fn decode_dbobject_text(bytes: &[u8], dbtype_name: &str) -> Result<String> {
372    if matches!(dbtype_name, "DB_TYPE_NCHAR" | "DB_TYPE_NVARCHAR") {
373        let mut chunks = bytes.chunks_exact(2);
374        let units = chunks
375            .by_ref()
376            .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
377            .collect::<Vec<_>>();
378        if !chunks.remainder().is_empty() {
379            return Err(ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
380        }
381        return String::from_utf16(&units)
382            .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
383    }
384    String::from_utf8(bytes.to_vec())
385        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-8 text"))
386}
387
388pub fn decode_dbobject_xmltype_text(bytes: &[u8]) -> Result<Option<String>> {
389    let mut reader = DbObjectPackedReader::new(bytes);
390    reader.read_header()?;
391    reader.skip(1)?;
392    let xml_flag = reader.read_u32be()?;
393    if xml_flag & TNS_XML_TYPE_FLAG_SKIP_NEXT_4 != 0 {
394        reader.skip(4)?;
395    }
396    let bytes = reader.read_raw(reader.bytes_left())?;
397    if xml_flag & TNS_XML_TYPE_STRING != 0 {
398        return decode_dbobject_text(bytes, "DB_TYPE_VARCHAR").map(Some);
399    }
400    if xml_flag & TNS_XML_TYPE_LOB != 0 {
401        return Ok(None);
402    }
403    Err(ProtocolError::TtcDecode("unexpected XMLTYPE flag"))
404}
405
406pub fn decode_lob_text(bytes: &[u8], csfrm: u8, locator: Option<&[u8]>) -> Result<String> {
407    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
408    if !use_utf16 {
409        // Validate UTF-8 in place over the borrowed bytes, then allocate the
410        // owned String once. Equivalent to `String::from_utf8(bytes.to_vec())`
411        // but without the temporary Vec that was copied, validated, and moved.
412        return core::str::from_utf8(bytes)
413            .map(str::to_owned)
414            .map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-8 text"));
415    }
416    // UTF-16 (almost always AL16UTF16 from the server for a multi-byte CLOB).
417    // An odd byte count is malformed; reject it before decoding, matching the
418    // previous `chunks_exact().remainder()` check.
419    if !bytes.len().is_multiple_of(2) {
420        return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
421    }
422    // LOB text is overwhelmingly ASCII/Latin, where every UTF-16 code unit is a
423    // single ASCII byte (high byte 0, low byte < 0x80 in big-endian; the mirror
424    // in little-endian). Decode those inline — one `String::push` of a 1-byte
425    // char, no intermediate buffer — and only on the first non-ASCII or
426    // surrogate unit hand the *remaining* bytes to the general
427    // `char::decode_utf16` decoder. This skips the old intermediate `Vec<u16>`
428    // (a second large allocation filled by a separate byte-swap pass) for the
429    // common case while staying byte-for-byte identical to the previous
430    // `String::from_utf16` output, including its rejection of lone surrogates.
431    // The byte-index walk means the fallback never rescans what was already
432    // decoded, so the worst case matches the general decoder rather than
433    // doubling it.
434    let mut out = String::with_capacity(bytes.len() / 2);
435    let mut i = 0;
436    while i < bytes.len() {
437        let b0 = bytes[i];
438        let b1 = bytes[i + 1];
439        let is_ascii = if little_endian {
440            b1 == 0 && b0 < 0x80
441        } else {
442            b0 == 0 && b1 < 0x80
443        };
444        if is_ascii {
445            // The non-zero byte is the ASCII code point regardless of endianness.
446            let ascii = if little_endian { b0 } else { b1 };
447            out.push(ascii as char);
448            i += 2;
449        } else {
450            let units = bytes[i..].chunks_exact(2).map(|chunk| {
451                if little_endian {
452                    u16::from_le_bytes([chunk[0], chunk[1]])
453                } else {
454                    u16::from_be_bytes([chunk[0], chunk[1]])
455                }
456            });
457            for unit in char::decode_utf16(units) {
458                let ch = unit.map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))?;
459                out.push(ch);
460            }
461            return Ok(out);
462        }
463    }
464    Ok(out)
465}
466
467pub fn encode_lob_text(value: &str, csfrm: u8, locator: Option<&[u8]>) -> Vec<u8> {
468    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
469    if !use_utf16 {
470        return value.as_bytes().to_vec();
471    }
472    let mut bytes = Vec::with_capacity(value.len() * 2);
473    for unit in value.encode_utf16() {
474        let encoded = if little_endian {
475            unit.to_le_bytes()
476        } else {
477            unit.to_be_bytes()
478        };
479        bytes.extend_from_slice(&encoded);
480    }
481    bytes
482}
483
484pub fn decode_bfile_locator_name(locator: &[u8]) -> Option<(String, String)> {
485    for dir_len_pos in 0..locator.len().saturating_sub(4) {
486        let dir_len = u16::from_be_bytes([locator[dir_len_pos], locator[dir_len_pos + 1]]) as usize;
487        if dir_len == 0 {
488            continue;
489        }
490        let dir_start = dir_len_pos + 2;
491        let dir_end = dir_start.checked_add(dir_len)?;
492        let file_len_end = dir_end.checked_add(2)?;
493        if file_len_end > locator.len() {
494            continue;
495        }
496        let file_len = u16::from_be_bytes([locator[dir_end], locator[dir_end + 1]]) as usize;
497        if file_len == 0 {
498            continue;
499        }
500        let file_start = file_len_end;
501        let file_end = file_start.checked_add(file_len)?;
502        if file_end != locator.len() {
503            continue;
504        }
505        let dir = std::str::from_utf8(&locator[dir_start..dir_end]).ok()?;
506        let file = std::str::from_utf8(&locator[file_start..file_end]).ok()?;
507        return Some((dir.to_string(), file.to_string()));
508    }
509    None
510}
511
512pub(crate) fn lob_text_uses_utf16(csfrm: u8, locator: Option<&[u8]>) -> (bool, bool) {
513    let use_utf16 = csfrm == CS_FORM_NCHAR
514        || locator
515            .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_3))
516            .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET != 0);
517    let little_endian = locator
518        .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_4))
519        .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN != 0);
520    (use_utf16, little_endian)
521}
522
523pub fn decode_dbobject_binary_float(bytes: &[u8]) -> Result<f32> {
524    let mut bytes: [u8; 4] = bytes
525        .try_into()
526        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_FLOAT"))?;
527    if bytes[0] & 0x80 != 0 {
528        bytes[0] &= 0x7f;
529    } else {
530        for byte in &mut bytes {
531            *byte = !*byte;
532        }
533    }
534    Ok(f32::from_bits(u32::from_be_bytes(bytes)))
535}
536
537pub fn decode_dbobject_binary_double(bytes: &[u8]) -> Result<f64> {
538    let mut bytes: [u8; 8] = bytes
539        .try_into()
540        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_DOUBLE"))?;
541    if bytes[0] & 0x80 != 0 {
542        bytes[0] &= 0x7f;
543    } else {
544        for byte in &mut bytes {
545            *byte = !*byte;
546        }
547    }
548    Ok(f64::from_bits(u64::from_be_bytes(bytes)))
549}
550
551#[cfg(test)]
552mod bounded_reader_tests {
553    use super::*;
554    use crate::wire::BoundedReader;
555
556    // BoundedReader invariant (l2p), DbObject collection family: a packed image
557    // declaring a huge collection element count (via the long-length indicator
558    // + a ub4 ~620M) but carrying no element bytes must NOT drive a
559    // gigabyte-scale Vec pre-allocation. The collection decode loop lives in the
560    // pyshim, but the bound is structural: DbObjectPackedReader exposes
561    // `remaining()` so the count can be checked/capped against the buffer.
562    #[test]
563    fn dbobject_oversized_collection_count_is_bounded_by_remaining() {
564        // read_length long form: 0xfe then ub4 0x25000000 (~620M), no elements.
565        let bytes = [TNS_LONG_LENGTH_INDICATOR, 0x25, 0x00, 0x00, 0x00];
566        let mut reader = DbObjectPackedReader::new(&bytes);
567        let num_elements = reader.read_length().expect("length decodes");
568        assert_eq!(num_elements, 0x2500_0000);
569
570        // Only the (now zero) remaining bytes can be honestly allocated: an
571        // element needs at least one byte, so alloc_count_checked must reject
572        // the lie rather than letting a caller reserve ~620M slots.
573        assert!(
574            reader.alloc_count_checked(num_elements, 1).is_err(),
575            "declared count must not exceed the empty remaining buffer"
576        );
577        // The cap-and-grow flavor caps the pre-allocation at remaining() (0).
578        let v: Vec<u32> = reader.with_capacity_bounded(num_elements, 1);
579        assert_eq!(
580            v.capacity(),
581            0,
582            "pre-allocation must be capped by remaining"
583        );
584    }
585
586    // A legitimate small collection count whose elements really fit passes
587    // through unchanged (no false rejection of valid DbObjects).
588    #[test]
589    fn dbobject_legitimate_collection_count_passes() {
590        // 8 bytes of element payload remaining, two declared elements.
591        let bytes = [1u8, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11];
592        let reader = DbObjectPackedReader::new(&bytes);
593        assert_eq!(reader.alloc_count_checked(2, 1).expect("fits"), 2);
594        let v: Vec<u32> = reader.with_capacity_bounded(2, 1);
595        assert_eq!(v.capacity(), 2);
596    }
597}
598
599#[cfg(test)]
600mod decode_lob_text_tests {
601    use super::*;
602
603    /// A locator that drives the UTF-16 decode path, with selectable endianness.
604    fn utf16_locator(little_endian: bool) -> Vec<u8> {
605        let mut loc = vec![0u8; 40];
606        loc[TNS_LOB_LOC_OFFSET_FLAG_3] = TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET;
607        if little_endian {
608            loc[TNS_LOB_LOC_OFFSET_FLAG_4] = TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN;
609        }
610        loc
611    }
612
613    fn encode_utf16(s: &str, little_endian: bool) -> Vec<u8> {
614        let mut bytes = Vec::with_capacity(s.len() * 2);
615        for unit in s.encode_utf16() {
616            let pair = if little_endian {
617                unit.to_le_bytes()
618            } else {
619                unit.to_be_bytes()
620            };
621            bytes.extend_from_slice(&pair);
622        }
623        bytes
624    }
625
626    /// Reference decoder = the previous implementation, used as the isomorphism
627    /// oracle for the optimized `decode_lob_text`.
628    fn reference_from_utf16(bytes: &[u8], little_endian: bool) -> Result<String> {
629        let mut chunks = bytes.chunks_exact(2);
630        let units = chunks
631            .by_ref()
632            .map(|chunk| {
633                if little_endian {
634                    u16::from_le_bytes([chunk[0], chunk[1]])
635                } else {
636                    u16::from_be_bytes([chunk[0], chunk[1]])
637                }
638            })
639            .collect::<Vec<_>>();
640        if !chunks.remainder().is_empty() {
641            return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
642        }
643        String::from_utf16(&units).map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))
644    }
645
646    #[test]
647    fn utf16_matches_reference_for_varied_text_both_endians() {
648        let samples = [
649            "",
650            "a",
651            "the quick brown fox 0123456789",
652            "café résumé naïve",           // BMP non-ASCII (Latin-1 supplement)
653            "ASCII then 漢字 then more",   // BMP CJK
654            "emoji: 😀🎉 mixed with text", // surrogate pairs
655            "\u{0000}\u{007f}\u{0080}\u{07ff}\u{0800}\u{ffff}", // boundary code points
656        ];
657        for sample in samples {
658            for little_endian in [false, true] {
659                let bytes = encode_utf16(sample, little_endian);
660                let loc = utf16_locator(little_endian);
661                let got =
662                    decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).expect("optimized decode");
663                let expected =
664                    reference_from_utf16(&bytes, little_endian).expect("reference decode");
665                assert_eq!(got, expected, "sample {sample:?} le={little_endian}");
666                assert_eq!(got, sample);
667            }
668        }
669    }
670
671    #[test]
672    fn utf16_odd_length_is_rejected_like_reference() {
673        let loc = utf16_locator(false);
674        // 3 bytes: one full unit plus a dangling byte.
675        let bytes = [0x00, 0x41, 0x00];
676        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
677        assert!(reference_from_utf16(&bytes, false).is_err());
678    }
679
680    #[test]
681    fn utf16_lone_surrogate_is_rejected_like_reference() {
682        let loc = utf16_locator(false);
683        // ASCII prefix then a lone high surrogate (no following low surrogate).
684        let mut bytes = encode_utf16("ok ", false);
685        bytes.extend_from_slice(&0xD83Du16.to_be_bytes());
686        bytes.extend_from_slice(&encode_utf16("tail", false));
687        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
688        assert!(reference_from_utf16(&bytes, false).is_err());
689    }
690
691    #[test]
692    fn utf8_path_matches_from_utf8() {
693        // csfrm != NCHAR and no UTF-16 locator flag -> UTF-8 path.
694        let loc = vec![0u8; 40];
695        let sample = "café — utf8 path ✓";
696        let bytes = sample.as_bytes();
697        let got = decode_lob_text(bytes, 1, Some(&loc)).expect("utf8 decode");
698        assert_eq!(got, String::from_utf8(bytes.to_vec()).unwrap());
699        assert_eq!(got, sample);
700        // invalid UTF-8 errors like String::from_utf8.
701        let bad = [0x66, 0x6f, 0xff, 0x6f];
702        assert!(decode_lob_text(&bad, 1, Some(&loc)).is_err());
703    }
704}