Skip to main content

oracledb_protocol/thin/
dbobject.rs

1#![forbid(unsafe_code)]
2
3use super::*;
4
5pub struct DbObjectPackedReader<'a> {
6    bytes: &'a [u8],
7    pos: usize,
8    limits: crate::wire::ProtocolLimits,
9}
10
11impl<'a> DbObjectPackedReader<'a> {
12    pub fn new(bytes: &'a [u8]) -> Self {
13        Self {
14            bytes,
15            pos: 0,
16            limits: crate::wire::ProtocolLimits::DEFAULT,
17        }
18    }
19
20    pub fn with_limits(bytes: &'a [u8], limits: crate::wire::ProtocolLimits) -> Result<Self> {
21        Ok(Self {
22            bytes,
23            pos: 0,
24            limits: limits.validate()?,
25        })
26    }
27
28    pub fn limits(&self) -> crate::wire::ProtocolLimits {
29        self.limits
30    }
31
32    pub fn read_u8(&mut self) -> Result<u8> {
33        let value = self
34            .bytes
35            .get(self.pos)
36            .copied()
37            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
38        self.pos += 1;
39        Ok(value)
40    }
41
42    fn read_raw(&mut self, len: usize) -> Result<&'a [u8]> {
43        self.limits.check_response_bytes(len)?;
44        let end = self.pos.checked_add(len).ok_or(ProtocolError::TtcDecode(
45            "DbObject packed data offset overflow",
46        ))?;
47        let bytes = self
48            .bytes
49            .get(self.pos..end)
50            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
51        self.pos = end;
52        Ok(bytes)
53    }
54
55    fn skip(&mut self, len: usize) -> Result<()> {
56        self.read_raw(len).map(|_| ())
57    }
58
59    fn read_u32be(&mut self) -> Result<u32> {
60        let bytes = self.read_raw(4)?;
61        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
62            ProtocolError::TtcDecode("invalid DbObject u32")
63        })?))
64    }
65
66    pub fn read_i32be(&mut self) -> Result<i32> {
67        let bytes = self.read_raw(4)?;
68        Ok(i32::from_be_bytes(bytes.try_into().map_err(|_| {
69            ProtocolError::TtcDecode("invalid DbObject i32")
70        })?))
71    }
72
73    pub fn read_length(&mut self) -> Result<usize> {
74        match self.read_u8()? {
75            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
76                .map_err(|_| ProtocolError::TtcDecode("DbObject length overflow")),
77            length => Ok(usize::from(length)),
78        }
79    }
80
81    fn skip_length(&mut self) -> Result<()> {
82        if self.read_u8()? == TNS_LONG_LENGTH_INDICATOR {
83            self.skip(4)?;
84        }
85        Ok(())
86    }
87
88    pub fn read_value_bytes(&mut self) -> Result<Option<Vec<u8>>> {
89        let length = match self.read_u8()? {
90            0 | TNS_NULL_LENGTH_INDICATOR => return Ok(None),
91            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
92                .map_err(|_| ProtocolError::TtcDecode("DbObject value length overflow"))?,
93            length => usize::from(length),
94        };
95        Ok(Some(self.read_raw(length)?.to_vec()))
96    }
97
98    pub fn read_header(&mut self) -> Result<()> {
99        let flags = self.read_u8()?;
100        let _version = self.read_u8()?;
101        self.skip_length()?;
102        if flags & TNS_OBJ_IS_DEGENERATE != 0 {
103            return Err(ProtocolError::UnsupportedFeature(
104                "DbObject stored in a LOB",
105            ));
106        }
107        if flags & TNS_OBJ_NO_PREFIX_SEG == 0 {
108            let prefix_len = self.read_length()?;
109            self.skip(prefix_len)?;
110        }
111        Ok(())
112    }
113
114    fn bytes_left(&self) -> usize {
115        self.bytes.len().saturating_sub(self.pos)
116    }
117
118    /// Bytes still unread in the packed image. Exposed so a caller materializing
119    /// a collection (whose element count is a server-declared `read_length`) can
120    /// bound its `Vec` pre-allocation against the buffer via the
121    /// [`BoundedReader`](crate::wire::BoundedReader) trait — closing the
122    /// OOM-from-length class for DbObject collections too.
123    pub fn remaining(&self) -> usize {
124        self.bytes_left()
125    }
126
127    pub fn read_atomic_null(&mut self, is_collection_context: bool) -> Result<bool> {
128        let value = self.read_u8()?;
129        match (value, is_collection_context) {
130            (TNS_OBJ_ATOMIC_NULL, _) | (TNS_NULL_LENGTH_INDICATOR, true) => Ok(true),
131            _ => {
132                self.pos = self.pos.saturating_sub(1);
133                Ok(false)
134            }
135        }
136    }
137}
138
139impl crate::wire::BoundedReader for DbObjectPackedReader<'_> {
140    fn remaining(&self) -> usize {
141        self.bytes_left()
142    }
143
144    fn protocol_limits(&self) -> crate::wire::ProtocolLimits {
145        self.limits
146    }
147}
148
149/// Writes a length-prefixed value into a DbObject pickle image buffer using the
150/// inner-buffer scheme (252 short cutoff, 32767 chunks for the long form). This
151/// mirrors `Buffer.write_bytes_with_length` used by `_pack_value`
152/// (reference impl/thin/packet.pyx) — NOT the 245-cutoff `write_length`.
153pub fn image_write_value_bytes(buf: &mut Vec<u8>, value: &[u8]) -> Result<()> {
154    if value.len() <= crate::wire::TNS_MAX_SHORT_LENGTH {
155        buf.push(value.len() as u8);
156        buf.extend_from_slice(value);
157        return Ok(());
158    }
159    buf.push(TNS_LONG_LENGTH_INDICATOR);
160    for chunk in value.chunks(32_767) {
161        image_write_ub4(
162            buf,
163            u32::try_from(chunk.len()).map_err(|_| ProtocolError::InvalidPacketLength {
164                length: chunk.len(),
165                minimum: 0,
166            })?,
167        );
168        buf.extend_from_slice(chunk);
169    }
170    image_write_ub4(buf, 0);
171    Ok(())
172}
173
174/// Writes a `ub4` into a pickle image buffer (reference `write_ub4`).
175pub(crate) fn image_write_ub4(buf: &mut Vec<u8>, value: u32) {
176    if value == 0 {
177        buf.push(0);
178    } else if value <= u32::from(u8::MAX) {
179        buf.push(1);
180        buf.push(value as u8);
181    } else if value <= u32::from(u16::MAX) {
182        buf.push(2);
183        buf.extend_from_slice(&(value as u16).to_be_bytes());
184    } else {
185        buf.push(4);
186        buf.extend_from_slice(&value.to_be_bytes());
187    }
188}
189
190/// Writes a collection/element count length into a pickle image buffer using
191/// the 245-cutoff scheme (reference `DbObjectPickleBuffer.write_length`).
192pub fn image_write_length(buf: &mut Vec<u8>, length: usize) -> Result<()> {
193    if length <= TNS_OBJ_MAX_SHORT_LENGTH {
194        buf.push(length as u8);
195    } else {
196        buf.push(TNS_LONG_LENGTH_INDICATOR);
197        buf.extend_from_slice(
198            &u32::try_from(length)
199                .map_err(|_| ProtocolError::InvalidPacketLength { length, minimum: 0 })?
200                .to_be_bytes(),
201        );
202    }
203    Ok(())
204}
205
206/// Builds the pickle image header (reference `write_header` + image_flags from
207/// `create_new_object`). Returns the buffer pre-seeded with the header; the
208/// caller appends the body and then calls [`image_finalize`] to back-patch the
209/// total size (4-byte BE at offset 3).
210pub fn image_begin(is_collection: bool) -> Vec<u8> {
211    let mut image_flags = TNS_OBJ_IS_VERSION_81;
212    if is_collection {
213        image_flags |= TNS_OBJ_IS_COLLECTION;
214    } else {
215        image_flags |= TNS_OBJ_NO_PREFIX_SEG;
216    }
217    let mut buf = Vec::new();
218    buf.push(image_flags);
219    buf.push(TNS_OBJ_IMAGE_VERSION);
220    buf.push(TNS_LONG_LENGTH_INDICATOR);
221    buf.extend_from_slice(&0u32.to_be_bytes()); // size placeholder (offset 3)
222    if is_collection {
223        buf.push(1); // length of prefix segment
224        buf.push(1); // prefix segment contents
225    }
226    buf
227}
228
229/// Back-patches the total image size (reference `_get_packed_data`: the 4-byte
230/// BE size at offset 3, after flags + version + 0xFE).
231pub fn image_finalize(buf: &mut [u8]) -> Result<()> {
232    let size = u32::try_from(buf.len()).map_err(|_| ProtocolError::InvalidPacketLength {
233        length: buf.len(),
234        minimum: 0,
235    })?;
236    let slot = buf.get_mut(3..7).ok_or(ProtocolError::TtcDecode(
237        "DbObject image too short to finalize",
238    ))?;
239    slot.copy_from_slice(&size.to_be_bytes());
240    Ok(())
241}
242
243/// Collection flags byte written at the start of a collection body
244/// (`TNS_OBJ_HAS_INDEXES` for associative arrays, else 0). Reference
245/// `_parse_tds` collection_flags + `_pack_data`.
246pub fn collection_flags_for(is_assoc_array: bool) -> u8 {
247    if is_assoc_array {
248        TNS_OBJ_HAS_INDEXES
249    } else {
250        0
251    }
252}
253
254/// Writes a NULL element/attribute marker into the image. Non-collection object
255/// attributes use `TNS_OBJ_ATOMIC_NULL` (253); scalars and collection elements
256/// use `TNS_NULL_LENGTH_INDICATOR` (255). Reference `_pack_value` None branch.
257pub fn image_write_null(buf: &mut Vec<u8>, atomic_null: bool) {
258    if atomic_null {
259        buf.push(TNS_OBJ_ATOMIC_NULL);
260    } else {
261        buf.push(TNS_NULL_LENGTH_INDICATOR);
262    }
263}
264
265/// Packs a single scalar `BindValue` into a DbObject pickle image buffer,
266/// mirroring `_pack_value` (reference impl/thin/dbobject.pyx:247-306). Object
267/// (nested) and Null/Array values are handled by the caller (the pyshim owns
268/// the recursion and null framing); this serves scalar attributes and
269/// collection elements only.
270pub fn pack_bindvalue_into_image(buf: &mut Vec<u8>, value: &BindValue, csfrm: u8) -> Result<()> {
271    match value {
272        BindValue::Text(text) => {
273            let bytes = encode_text_value(text, csfrm);
274            image_write_value_bytes(buf, &bytes)
275        }
276        BindValue::Raw(bytes) => image_write_value_bytes(buf, bytes),
277        BindValue::Number(text) => {
278            let bytes = encode_number_text(text)?;
279            image_write_value_bytes(buf, &bytes)
280        }
281        // PLS_INTEGER / BINARY_INTEGER pack as uint8(4) + uint32be (NOT Oracle
282        // number text) inside an object image.
283        BindValue::BinaryInteger(text) => {
284            let value = parse_binary_integer_u32(text)?;
285            buf.push(4);
286            buf.extend_from_slice(&value.to_be_bytes());
287            Ok(())
288        }
289        // BOOLEAN inside an image is the 4-byte form, NOT [1,1]/[0].
290        BindValue::Boolean(value) => {
291            buf.push(4);
292            buf.extend_from_slice(&u32::from(*value).to_be_bytes());
293            Ok(())
294        }
295        BindValue::BinaryDouble(value) => {
296            let bytes = encode_binary_double(*value);
297            image_write_value_bytes(buf, &bytes)
298        }
299        BindValue::BinaryFloat(value) => {
300            let bytes = encode_binary_float(*value as f32);
301            image_write_value_bytes(buf, &bytes)
302        }
303        BindValue::DateTime {
304            year,
305            month,
306            day,
307            hour,
308            minute,
309            second,
310        } => {
311            let bytes = encode_oracle_date(*year, *month, *day, *hour, *minute, *second)?;
312            image_write_value_bytes(buf, &bytes)
313        }
314        BindValue::Timestamp {
315            year,
316            month,
317            day,
318            hour,
319            minute,
320            second,
321            nanosecond,
322            ora_type_num,
323        } => {
324            let bytes = if matches!(*ora_type_num, ORA_TYPE_NUM_TIMESTAMP_TZ) {
325                encode_oracle_timestamp_tz(
326                    *year,
327                    *month,
328                    *day,
329                    *hour,
330                    *minute,
331                    *second,
332                    *nanosecond,
333                )?
334            } else {
335                encode_oracle_timestamp(*year, *month, *day, *hour, *minute, *second, *nanosecond)?
336            };
337            image_write_value_bytes(buf, &bytes)
338        }
339        BindValue::Lob { locator, .. } => image_write_value_bytes(buf, locator),
340        BindValue::IntervalDS {
341            days,
342            seconds,
343            microseconds,
344        } => {
345            let bytes = encode_interval_ds(*days, *seconds, *microseconds)?;
346            image_write_value_bytes(buf, &bytes)
347        }
348        BindValue::IntervalYM { years, months } => {
349            let bytes = encode_interval_ym(*years, *months)?;
350            image_write_value_bytes(buf, &bytes)
351        }
352        BindValue::Null => {
353            image_write_null(buf, false);
354            Ok(())
355        }
356        _ => Err(ProtocolError::UnsupportedFeature(
357            "DbObject attribute type not supported for input binding",
358        )),
359    }
360}
361
362pub(crate) fn parse_binary_integer_u32(text: &str) -> Result<u32> {
363    let trimmed = text.trim();
364    let parsed: i64 = trimmed
365        .parse()
366        .map_err(|_| ProtocolError::TtcDecode("invalid BINARY_INTEGER value"))?;
367    Ok(parsed as u32)
368}
369
370/// Frames a fully-packed DbObject pickle `image` into the outgoing data row,
371/// replacing the zero stub used for empty OUT binds. Mirrors
372/// `WriteBuffer.write_dbobject` (reference impl/thin/packet.pyx:842-857). The
373/// `toid` is derived from the type `oid` per `create_new_object` (620-622).
374pub fn write_dbobject_bind(writer: &mut TtcWriter, oid: &[u8], image: &[u8]) -> Result<()> {
375    let mut toid = Vec::with_capacity(4 + oid.len() + TNS_EXTENT_OID.len());
376    toid.extend_from_slice(&[0x00, 0x22, TNS_OBJ_NON_NULL_OID, TNS_OBJ_HAS_EXTENT_OID]);
377    toid.extend_from_slice(oid);
378    toid.extend_from_slice(&TNS_EXTENT_OID);
379    writer.write_bytes_with_two_lengths(Some(&toid))?;
380    writer.write_bytes_with_two_lengths(Some(oid))?;
381    writer.write_ub4(0); // snapshot
382    writer.write_ub4(0); // version
383    writer.write_ub4(u32::try_from(image.len()).map_err(|_| {
384        ProtocolError::InvalidPacketLength {
385            length: image.len(),
386            minimum: 0,
387        }
388    })?);
389    writer.write_ub4(TNS_OBJ_TOP_LEVEL);
390    writer.write_bytes_with_length(image)
391}
392
393pub fn decode_dbobject_text(bytes: &[u8], dbtype_name: &str) -> Result<String> {
394    if matches!(dbtype_name, "DB_TYPE_NCHAR" | "DB_TYPE_NVARCHAR") {
395        let mut chunks = bytes.chunks_exact(2);
396        let units = chunks
397            .by_ref()
398            .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
399            .collect::<Vec<_>>();
400        if !chunks.remainder().is_empty() {
401            return Err(ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
402        }
403        return String::from_utf16(&units)
404            .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
405    }
406    String::from_utf8(bytes.to_vec())
407        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-8 text"))
408}
409
410pub fn decode_dbobject_xmltype_text(bytes: &[u8]) -> Result<Option<String>> {
411    let mut reader = DbObjectPackedReader::new(bytes);
412    reader.read_header()?;
413    reader.skip(1)?;
414    let xml_flag = reader.read_u32be()?;
415    if xml_flag & TNS_XML_TYPE_FLAG_SKIP_NEXT_4 != 0 {
416        reader.skip(4)?;
417    }
418    let bytes = reader.read_raw(reader.bytes_left())?;
419    if xml_flag & TNS_XML_TYPE_STRING != 0 {
420        return decode_dbobject_text(bytes, "DB_TYPE_VARCHAR").map(Some);
421    }
422    if xml_flag & TNS_XML_TYPE_LOB != 0 {
423        return Ok(None);
424    }
425    Err(ProtocolError::TtcDecode("unexpected XMLTYPE flag"))
426}
427
428pub fn decode_lob_text(bytes: &[u8], csfrm: u8, locator: Option<&[u8]>) -> Result<String> {
429    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
430    if !use_utf16 {
431        // Validate UTF-8 in place over the borrowed bytes, then allocate the
432        // owned String once. Equivalent to `String::from_utf8(bytes.to_vec())`
433        // but without the temporary Vec that was copied, validated, and moved.
434        return core::str::from_utf8(bytes)
435            .map(str::to_owned)
436            .map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-8 text"));
437    }
438    // UTF-16 (almost always AL16UTF16 from the server for a multi-byte CLOB).
439    // An odd byte count is malformed; reject it before decoding, matching the
440    // previous `chunks_exact().remainder()` check.
441    if !bytes.len().is_multiple_of(2) {
442        return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
443    }
444    // LOB text is overwhelmingly ASCII/Latin, where every UTF-16 code unit is a
445    // single ASCII byte (high byte 0, low byte < 0x80 in big-endian; the mirror
446    // in little-endian). Decode those inline — one `String::push` of a 1-byte
447    // char, no intermediate buffer — and only on the first non-ASCII or
448    // surrogate unit hand the *remaining* bytes to the general
449    // `char::decode_utf16` decoder. This skips the old intermediate `Vec<u16>`
450    // (a second large allocation filled by a separate byte-swap pass) for the
451    // common case while staying byte-for-byte identical to the previous
452    // `String::from_utf16` output, including its rejection of lone surrogates.
453    // The byte-index walk means the fallback never rescans what was already
454    // decoded, so the worst case matches the general decoder rather than
455    // doubling it.
456    let mut out = String::with_capacity(bytes.len() / 2);
457    let mut i = 0;
458    while i < bytes.len() {
459        let b0 = bytes[i];
460        let b1 = bytes[i + 1];
461        let is_ascii = if little_endian {
462            b1 == 0 && b0 < 0x80
463        } else {
464            b0 == 0 && b1 < 0x80
465        };
466        if is_ascii {
467            // The non-zero byte is the ASCII code point regardless of endianness.
468            let ascii = if little_endian { b0 } else { b1 };
469            out.push(ascii as char);
470            i += 2;
471        } else {
472            let units = bytes[i..].chunks_exact(2).map(|chunk| {
473                if little_endian {
474                    u16::from_le_bytes([chunk[0], chunk[1]])
475                } else {
476                    u16::from_be_bytes([chunk[0], chunk[1]])
477                }
478            });
479            for unit in char::decode_utf16(units) {
480                let ch = unit.map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))?;
481                out.push(ch);
482            }
483            return Ok(out);
484        }
485    }
486    Ok(out)
487}
488
489pub fn encode_lob_text(value: &str, csfrm: u8, locator: Option<&[u8]>) -> Vec<u8> {
490    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
491    if !use_utf16 {
492        return value.as_bytes().to_vec();
493    }
494    let mut bytes = Vec::with_capacity(value.len() * 2);
495    for unit in value.encode_utf16() {
496        let encoded = if little_endian {
497            unit.to_le_bytes()
498        } else {
499            unit.to_be_bytes()
500        };
501        bytes.extend_from_slice(&encoded);
502    }
503    bytes
504}
505
506pub fn decode_bfile_locator_name(locator: &[u8]) -> Option<(String, String)> {
507    for dir_len_pos in 0..locator.len().saturating_sub(4) {
508        let dir_len = u16::from_be_bytes([locator[dir_len_pos], locator[dir_len_pos + 1]]) as usize;
509        if dir_len == 0 {
510            continue;
511        }
512        let dir_start = dir_len_pos + 2;
513        let dir_end = dir_start.checked_add(dir_len)?;
514        let file_len_end = dir_end.checked_add(2)?;
515        if file_len_end > locator.len() {
516            continue;
517        }
518        let file_len = u16::from_be_bytes([locator[dir_end], locator[dir_end + 1]]) as usize;
519        if file_len == 0 {
520            continue;
521        }
522        let file_start = file_len_end;
523        let file_end = file_start.checked_add(file_len)?;
524        if file_end != locator.len() {
525            continue;
526        }
527        let dir = std::str::from_utf8(&locator[dir_start..dir_end]).ok()?;
528        let file = std::str::from_utf8(&locator[file_start..file_end]).ok()?;
529        return Some((dir.to_string(), file.to_string()));
530    }
531    None
532}
533
534pub(crate) fn lob_text_uses_utf16(csfrm: u8, locator: Option<&[u8]>) -> (bool, bool) {
535    let use_utf16 = csfrm == CS_FORM_NCHAR
536        || locator
537            .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_3))
538            .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET != 0);
539    let little_endian = locator
540        .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_4))
541        .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN != 0);
542    (use_utf16, little_endian)
543}
544
545pub fn decode_dbobject_binary_float(bytes: &[u8]) -> Result<f32> {
546    let mut bytes: [u8; 4] = bytes
547        .try_into()
548        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_FLOAT"))?;
549    if bytes[0] & 0x80 != 0 {
550        bytes[0] &= 0x7f;
551    } else {
552        for byte in &mut bytes {
553            *byte = !*byte;
554        }
555    }
556    Ok(f32::from_bits(u32::from_be_bytes(bytes)))
557}
558
559pub fn decode_dbobject_binary_double(bytes: &[u8]) -> Result<f64> {
560    let mut bytes: [u8; 8] = bytes
561        .try_into()
562        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_DOUBLE"))?;
563    if bytes[0] & 0x80 != 0 {
564        bytes[0] &= 0x7f;
565    } else {
566        for byte in &mut bytes {
567            *byte = !*byte;
568        }
569    }
570    Ok(f64::from_bits(u64::from_be_bytes(bytes)))
571}
572
573#[cfg(test)]
574mod bounded_reader_tests {
575    use super::*;
576    use crate::wire::BoundedReader;
577
578    // BoundedReader invariant (l2p), DbObject collection family: a packed image
579    // declaring a huge collection element count (via the long-length indicator
580    // + a ub4 ~620M) but carrying no element bytes must NOT drive a
581    // gigabyte-scale Vec pre-allocation. The collection decode loop lives in the
582    // pyshim, but the bound is structural: DbObjectPackedReader exposes
583    // `remaining()` so the count can be checked/capped against the buffer.
584    #[test]
585    fn dbobject_oversized_collection_count_is_bounded_by_remaining() {
586        // read_length long form: 0xfe then ub4 0x25000000 (~620M), no elements.
587        let bytes = [TNS_LONG_LENGTH_INDICATOR, 0x25, 0x00, 0x00, 0x00];
588        let mut reader = DbObjectPackedReader::new(&bytes);
589        let num_elements = reader.read_length().expect("length decodes");
590        assert_eq!(num_elements, 0x2500_0000);
591
592        // Only the (now zero) remaining bytes can be honestly allocated: an
593        // element needs at least one byte, so alloc_count_checked must reject
594        // the lie rather than letting a caller reserve ~620M slots.
595        assert!(
596            reader.alloc_count_checked(num_elements, 1).is_err(),
597            "declared count must not exceed the empty remaining buffer"
598        );
599        // The cap-and-grow flavor caps the pre-allocation at remaining() (0).
600        let v: Vec<u32> = reader.with_capacity_bounded(num_elements, 1);
601        assert_eq!(
602            v.capacity(),
603            0,
604            "pre-allocation must be capped by remaining"
605        );
606    }
607
608    // A legitimate small collection count whose elements really fit passes
609    // through unchanged (no false rejection of valid DbObjects).
610    #[test]
611    fn dbobject_legitimate_collection_count_passes() {
612        // 8 bytes of element payload remaining, two declared elements.
613        let bytes = [1u8, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11];
614        let reader = DbObjectPackedReader::new(&bytes);
615        assert_eq!(reader.alloc_count_checked(2, 1).expect("fits"), 2);
616        let v: Vec<u32> = reader.with_capacity_bounded(2, 1);
617        assert_eq!(v.capacity(), 2);
618    }
619}
620
621#[cfg(test)]
622mod decode_lob_text_tests {
623    use super::*;
624
625    /// A locator that drives the UTF-16 decode path, with selectable endianness.
626    fn utf16_locator(little_endian: bool) -> Vec<u8> {
627        let mut loc = vec![0u8; 40];
628        loc[TNS_LOB_LOC_OFFSET_FLAG_3] = TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET;
629        if little_endian {
630            loc[TNS_LOB_LOC_OFFSET_FLAG_4] = TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN;
631        }
632        loc
633    }
634
635    fn encode_utf16(s: &str, little_endian: bool) -> Vec<u8> {
636        let mut bytes = Vec::with_capacity(s.len() * 2);
637        for unit in s.encode_utf16() {
638            let pair = if little_endian {
639                unit.to_le_bytes()
640            } else {
641                unit.to_be_bytes()
642            };
643            bytes.extend_from_slice(&pair);
644        }
645        bytes
646    }
647
648    /// Reference decoder = the previous implementation, used as the isomorphism
649    /// oracle for the optimized `decode_lob_text`.
650    fn reference_from_utf16(bytes: &[u8], little_endian: bool) -> Result<String> {
651        let mut chunks = bytes.chunks_exact(2);
652        let units = chunks
653            .by_ref()
654            .map(|chunk| {
655                if little_endian {
656                    u16::from_le_bytes([chunk[0], chunk[1]])
657                } else {
658                    u16::from_be_bytes([chunk[0], chunk[1]])
659                }
660            })
661            .collect::<Vec<_>>();
662        if !chunks.remainder().is_empty() {
663            return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
664        }
665        String::from_utf16(&units).map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))
666    }
667
668    #[test]
669    fn utf16_matches_reference_for_varied_text_both_endians() {
670        let samples = [
671            "",
672            "a",
673            "the quick brown fox 0123456789",
674            "café résumé naïve",           // BMP non-ASCII (Latin-1 supplement)
675            "ASCII then 漢字 then more",   // BMP CJK
676            "emoji: 😀🎉 mixed with text", // surrogate pairs
677            "\u{0000}\u{007f}\u{0080}\u{07ff}\u{0800}\u{ffff}", // boundary code points
678        ];
679        for sample in samples {
680            for little_endian in [false, true] {
681                let bytes = encode_utf16(sample, little_endian);
682                let loc = utf16_locator(little_endian);
683                let got =
684                    decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).expect("optimized decode");
685                let expected =
686                    reference_from_utf16(&bytes, little_endian).expect("reference decode");
687                assert_eq!(got, expected, "sample {sample:?} le={little_endian}");
688                assert_eq!(got, sample);
689            }
690        }
691    }
692
693    #[test]
694    fn utf16_odd_length_is_rejected_like_reference() {
695        let loc = utf16_locator(false);
696        // 3 bytes: one full unit plus a dangling byte.
697        let bytes = [0x00, 0x41, 0x00];
698        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
699        assert!(reference_from_utf16(&bytes, false).is_err());
700    }
701
702    #[test]
703    fn utf16_lone_surrogate_is_rejected_like_reference() {
704        let loc = utf16_locator(false);
705        // ASCII prefix then a lone high surrogate (no following low surrogate).
706        let mut bytes = encode_utf16("ok ", false);
707        bytes.extend_from_slice(&0xD83Du16.to_be_bytes());
708        bytes.extend_from_slice(&encode_utf16("tail", false));
709        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
710        assert!(reference_from_utf16(&bytes, false).is_err());
711    }
712
713    #[test]
714    fn utf8_path_matches_from_utf8() {
715        // csfrm != NCHAR and no UTF-16 locator flag -> UTF-8 path.
716        let loc = vec![0u8; 40];
717        let sample = "café — utf8 path ✓";
718        let bytes = sample.as_bytes();
719        let got = decode_lob_text(bytes, 1, Some(&loc)).expect("utf8 decode");
720        assert_eq!(got, String::from_utf8(bytes.to_vec()).unwrap());
721        assert_eq!(got, sample);
722        // invalid UTF-8 errors like String::from_utf8.
723        let bad = [0x66, 0x6f, 0xff, 0x6f];
724        assert!(decode_lob_text(&bad, 1, Some(&loc)).is_err());
725    }
726}