Skip to main content

oracledb_protocol/thin/
dbobject.rs

1#![forbid(unsafe_code)]
2
3use super::*;
4
5pub struct DbObjectPackedReader<'a> {
6    bytes: &'a [u8],
7    pos: usize,
8    limits: crate::wire::ProtocolLimits,
9}
10
11impl<'a> DbObjectPackedReader<'a> {
12    pub fn new(bytes: &'a [u8]) -> Self {
13        Self {
14            bytes,
15            pos: 0,
16            limits: crate::wire::ProtocolLimits::DEFAULT,
17        }
18    }
19
20    pub fn with_limits(bytes: &'a [u8], limits: crate::wire::ProtocolLimits) -> Result<Self> {
21        Ok(Self {
22            bytes,
23            pos: 0,
24            limits: limits.validate()?,
25        })
26    }
27
28    pub fn limits(&self) -> crate::wire::ProtocolLimits {
29        self.limits
30    }
31
32    pub fn read_u8(&mut self) -> Result<u8> {
33        let value = self
34            .bytes
35            .get(self.pos)
36            .copied()
37            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
38        self.pos += 1;
39        Ok(value)
40    }
41
42    fn read_raw(&mut self, len: usize) -> Result<&'a [u8]> {
43        self.limits.check_response_bytes(len)?;
44        let end = self.pos.checked_add(len).ok_or(ProtocolError::TtcDecode(
45            "DbObject packed data offset overflow",
46        ))?;
47        let bytes = self
48            .bytes
49            .get(self.pos..end)
50            .ok_or(ProtocolError::TtcDecode("truncated DbObject packed data"))?;
51        self.pos = end;
52        Ok(bytes)
53    }
54
55    fn skip(&mut self, len: usize) -> Result<()> {
56        self.read_raw(len).map(|_| ())
57    }
58
59    fn read_u32be(&mut self) -> Result<u32> {
60        let bytes = self.read_raw(4)?;
61        Ok(u32::from_be_bytes(bytes.try_into().map_err(|_| {
62            ProtocolError::TtcDecode("invalid DbObject u32")
63        })?))
64    }
65
66    pub fn read_i32be(&mut self) -> Result<i32> {
67        let bytes = self.read_raw(4)?;
68        Ok(i32::from_be_bytes(bytes.try_into().map_err(|_| {
69            ProtocolError::TtcDecode("invalid DbObject i32")
70        })?))
71    }
72
73    pub fn read_length(&mut self) -> Result<usize> {
74        match self.read_u8()? {
75            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
76                .map_err(|_| ProtocolError::TtcDecode("DbObject length overflow")),
77            length => Ok(usize::from(length)),
78        }
79    }
80
81    fn skip_length(&mut self) -> Result<()> {
82        if self.read_u8()? == TNS_LONG_LENGTH_INDICATOR {
83            self.skip(4)?;
84        }
85        Ok(())
86    }
87
88    pub fn read_value_bytes(&mut self) -> Result<Option<Vec<u8>>> {
89        let length = match self.read_u8()? {
90            TNS_NULL_LENGTH_INDICATOR => return Ok(None),
91            TNS_LONG_LENGTH_INDICATOR => usize::try_from(self.read_u32be()?)
92                .map_err(|_| ProtocolError::TtcDecode("DbObject value length overflow"))?,
93            length => usize::from(length),
94        };
95        Ok(Some(self.read_raw(length)?.to_vec()))
96    }
97
98    pub fn read_header(&mut self) -> Result<()> {
99        let flags = self.read_u8()?;
100        let _version = self.read_u8()?;
101        self.skip_length()?;
102        if flags & TNS_OBJ_IS_DEGENERATE != 0 {
103            return Err(ProtocolError::UnsupportedFeature(
104                "DbObject stored in a LOB",
105            ));
106        }
107        if flags & TNS_OBJ_NO_PREFIX_SEG == 0 {
108            let prefix_len = self.read_length()?;
109            self.skip(prefix_len)?;
110        }
111        Ok(())
112    }
113
114    fn bytes_left(&self) -> usize {
115        self.bytes.len().saturating_sub(self.pos)
116    }
117
118    /// Bytes still unread in the packed image. Exposed so a caller materializing
119    /// a collection (whose element count is a server-declared `read_length`) can
120    /// bound its `Vec` pre-allocation against the buffer via the
121    /// [`BoundedReader`](crate::wire::BoundedReader) trait — closing the
122    /// OOM-from-length class for DbObject collections too.
123    pub fn remaining(&self) -> usize {
124        self.bytes_left()
125    }
126
127    pub fn read_atomic_null(&mut self, is_collection_context: bool) -> Result<bool> {
128        let value = self.read_u8()?;
129        match (value, is_collection_context) {
130            (TNS_OBJ_ATOMIC_NULL, _) | (TNS_NULL_LENGTH_INDICATOR, true) => Ok(true),
131            _ => {
132                self.pos = self.pos.saturating_sub(1);
133                Ok(false)
134            }
135        }
136    }
137}
138
139impl crate::wire::BoundedReader for DbObjectPackedReader<'_> {
140    fn remaining(&self) -> usize {
141        self.bytes_left()
142    }
143
144    fn protocol_limits(&self) -> crate::wire::ProtocolLimits {
145        self.limits
146    }
147}
148
149/// Writes a length-prefixed value into a DbObject pickle image buffer using the
150/// DbObject image `write_length` format: 245-byte short cutoff, or `0xfe`
151/// followed by one big-endian `uint32` length and the full value bytes.
152pub fn image_write_value_bytes(buf: &mut Vec<u8>, value: &[u8]) -> Result<()> {
153    if value.len() <= TNS_OBJ_MAX_SHORT_LENGTH {
154        buf.push(value.len() as u8);
155        buf.extend_from_slice(value);
156        return Ok(());
157    }
158    let length = u32::try_from(value.len()).map_err(|_| ProtocolError::InvalidPacketLength {
159        length: value.len(),
160        minimum: 0,
161    })?;
162    buf.push(TNS_LONG_LENGTH_INDICATOR);
163    buf.extend_from_slice(&length.to_be_bytes());
164    buf.extend_from_slice(value);
165    Ok(())
166}
167
168/// Writes a collection/element count length into a pickle image buffer using
169/// the 245-cutoff scheme (reference `DbObjectPickleBuffer.write_length`).
170pub fn image_write_length(buf: &mut Vec<u8>, length: usize) -> Result<()> {
171    if length <= TNS_OBJ_MAX_SHORT_LENGTH {
172        buf.push(length as u8);
173    } else {
174        buf.push(TNS_LONG_LENGTH_INDICATOR);
175        buf.extend_from_slice(
176            &u32::try_from(length)
177                .map_err(|_| ProtocolError::InvalidPacketLength { length, minimum: 0 })?
178                .to_be_bytes(),
179        );
180    }
181    Ok(())
182}
183
184/// Builds the pickle image header (reference `write_header` + image_flags from
185/// `create_new_object`). Returns the buffer pre-seeded with the header; the
186/// caller appends the body and then calls [`image_finalize`] to back-patch the
187/// total size (4-byte BE at offset 3).
188pub fn image_begin(is_collection: bool) -> Vec<u8> {
189    let mut image_flags = TNS_OBJ_IS_VERSION_81;
190    if is_collection {
191        image_flags |= TNS_OBJ_IS_COLLECTION;
192    } else {
193        image_flags |= TNS_OBJ_NO_PREFIX_SEG;
194    }
195    let mut buf = Vec::new();
196    buf.push(image_flags);
197    buf.push(TNS_OBJ_IMAGE_VERSION);
198    buf.push(TNS_LONG_LENGTH_INDICATOR);
199    buf.extend_from_slice(&0u32.to_be_bytes()); // size placeholder (offset 3)
200    if is_collection {
201        buf.push(1); // length of prefix segment
202        buf.push(1); // prefix segment contents
203    }
204    buf
205}
206
207/// Back-patches the total image size (reference `_get_packed_data`: the 4-byte
208/// BE size at offset 3, after flags + version + 0xFE).
209pub fn image_finalize(buf: &mut [u8]) -> Result<()> {
210    let size = u32::try_from(buf.len()).map_err(|_| ProtocolError::InvalidPacketLength {
211        length: buf.len(),
212        minimum: 0,
213    })?;
214    let slot = buf.get_mut(3..7).ok_or(ProtocolError::TtcDecode(
215        "DbObject image too short to finalize",
216    ))?;
217    slot.copy_from_slice(&size.to_be_bytes());
218    Ok(())
219}
220
221/// Collection flags byte written at the start of a collection body
222/// (`TNS_OBJ_HAS_INDEXES` for associative arrays, else 0). Reference
223/// `_parse_tds` collection_flags + `_pack_data`.
224pub fn collection_flags_for(is_assoc_array: bool) -> u8 {
225    if is_assoc_array {
226        TNS_OBJ_HAS_INDEXES
227    } else {
228        0
229    }
230}
231
232/// Writes a NULL element/attribute marker into the image. Non-collection object
233/// attributes use `TNS_OBJ_ATOMIC_NULL` (253); scalars and collection elements
234/// use `TNS_NULL_LENGTH_INDICATOR` (255). Reference `_pack_value` None branch.
235pub fn image_write_null(buf: &mut Vec<u8>, atomic_null: bool) {
236    if atomic_null {
237        buf.push(TNS_OBJ_ATOMIC_NULL);
238    } else {
239        buf.push(TNS_NULL_LENGTH_INDICATOR);
240    }
241}
242
243/// Packs a single scalar `BindValue` into a DbObject pickle image buffer,
244/// mirroring `_pack_value` (reference impl/thin/dbobject.pyx:247-306). Object
245/// (nested) and Null/Array values are handled by the caller (the pyshim owns
246/// the recursion and null framing); this serves scalar attributes and
247/// collection elements only.
248pub fn pack_bindvalue_into_image(buf: &mut Vec<u8>, value: &BindValue, csfrm: u8) -> Result<()> {
249    match value {
250        BindValue::Text(text) => {
251            let bytes = encode_text_value(text, csfrm);
252            image_write_value_bytes(buf, &bytes)
253        }
254        BindValue::Raw(bytes) => image_write_value_bytes(buf, bytes),
255        BindValue::Number(text) => {
256            let bytes = encode_number_text(text)?;
257            image_write_value_bytes(buf, &bytes)
258        }
259        // PLS_INTEGER / BINARY_INTEGER pack as uint8(4) + uint32be (NOT Oracle
260        // number text) inside an object image.
261        BindValue::BinaryInteger(text) => {
262            let value = parse_binary_integer_u32(text)?;
263            buf.push(4);
264            buf.extend_from_slice(&value.to_be_bytes());
265            Ok(())
266        }
267        // BOOLEAN inside an image is the 4-byte form, NOT [1,1]/[0].
268        BindValue::Boolean(value) => {
269            buf.push(4);
270            buf.extend_from_slice(&u32::from(*value).to_be_bytes());
271            Ok(())
272        }
273        BindValue::BinaryDouble(value) => {
274            let bytes = encode_binary_double(*value);
275            image_write_value_bytes(buf, &bytes)
276        }
277        BindValue::BinaryFloat(value) => {
278            let bytes = encode_binary_float(*value as f32);
279            image_write_value_bytes(buf, &bytes)
280        }
281        BindValue::DateTime {
282            year,
283            month,
284            day,
285            hour,
286            minute,
287            second,
288        } => {
289            let bytes = encode_oracle_date(*year, *month, *day, *hour, *minute, *second)?;
290            image_write_value_bytes(buf, &bytes)
291        }
292        BindValue::Timestamp {
293            year,
294            month,
295            day,
296            hour,
297            minute,
298            second,
299            nanosecond,
300            ora_type_num,
301        } => {
302            let bytes = if matches!(*ora_type_num, ORA_TYPE_NUM_TIMESTAMP_TZ) {
303                encode_oracle_timestamp_tz(
304                    *year,
305                    *month,
306                    *day,
307                    *hour,
308                    *minute,
309                    *second,
310                    *nanosecond,
311                )?
312            } else {
313                encode_oracle_timestamp(*year, *month, *day, *hour, *minute, *second, *nanosecond)?
314            };
315            image_write_value_bytes(buf, &bytes)
316        }
317        BindValue::TimestampTz {
318            year,
319            month,
320            day,
321            hour,
322            minute,
323            second,
324            nanosecond,
325            offset_minutes,
326        } => {
327            let bytes = encode_oracle_timestamp_tz_with_offset(
328                *year,
329                *month,
330                *day,
331                *hour,
332                *minute,
333                *second,
334                *nanosecond,
335                *offset_minutes,
336            )?;
337            image_write_value_bytes(buf, &bytes)
338        }
339        BindValue::Lob { locator, .. } => image_write_value_bytes(buf, locator),
340        BindValue::IntervalDS {
341            days,
342            seconds,
343            microseconds,
344        } => {
345            let nanoseconds = microseconds
346                .checked_mul(1000)
347                .ok_or(ProtocolError::TtcDecode(
348                    "INTERVAL DS fractional seconds out of range",
349                ))?;
350            let bytes = encode_interval_ds(*days, *seconds, nanoseconds)?;
351            image_write_value_bytes(buf, &bytes)
352        }
353        BindValue::IntervalYM { years, months } => {
354            let bytes = encode_interval_ym(*years, *months)?;
355            image_write_value_bytes(buf, &bytes)
356        }
357        BindValue::Null => {
358            image_write_null(buf, false);
359            Ok(())
360        }
361        _ => Err(ProtocolError::UnsupportedFeature(
362            "DbObject attribute type not supported for input binding",
363        )),
364    }
365}
366
367pub(crate) fn parse_binary_integer_u32(text: &str) -> Result<u32> {
368    let trimmed = text.trim();
369    let parsed: i64 = trimmed
370        .parse()
371        .map_err(|_| ProtocolError::TtcDecode("invalid BINARY_INTEGER value"))?;
372    Ok(parsed as u32)
373}
374
375/// Frames a fully-packed DbObject pickle `image` into the outgoing data row,
376/// replacing the zero stub used for empty OUT binds. Mirrors
377/// `WriteBuffer.write_dbobject` (reference impl/thin/packet.pyx:842-857). The
378/// `toid` is derived from the type `oid` per `create_new_object` (620-622).
379pub fn write_dbobject_bind(writer: &mut TtcWriter, oid: &[u8], image: &[u8]) -> Result<()> {
380    let mut toid = Vec::with_capacity(4 + oid.len() + TNS_EXTENT_OID.len());
381    toid.extend_from_slice(&[0x00, 0x22, TNS_OBJ_NON_NULL_OID, TNS_OBJ_HAS_EXTENT_OID]);
382    toid.extend_from_slice(oid);
383    toid.extend_from_slice(&TNS_EXTENT_OID);
384    writer.write_bytes_with_two_lengths(Some(&toid))?;
385    writer.write_bytes_with_two_lengths(Some(oid))?;
386    writer.write_ub4(0); // snapshot
387    writer.write_ub4(0); // version
388    writer.write_ub4(u32::try_from(image.len()).map_err(|_| {
389        ProtocolError::InvalidPacketLength {
390            length: image.len(),
391            minimum: 0,
392        }
393    })?);
394    writer.write_ub4(TNS_OBJ_TOP_LEVEL);
395    writer.write_bytes_with_length(image)
396}
397
398pub fn decode_dbobject_text(bytes: &[u8], dbtype_name: &str) -> Result<String> {
399    if matches!(dbtype_name, "DB_TYPE_NCHAR" | "DB_TYPE_NVARCHAR") {
400        let mut chunks = bytes.chunks_exact(2);
401        let units = chunks
402            .by_ref()
403            .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
404            .collect::<Vec<_>>();
405        if !chunks.remainder().is_empty() {
406            return Err(ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
407        }
408        return String::from_utf16(&units)
409            .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-16 text"));
410    }
411    String::from_utf8(bytes.to_vec())
412        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject UTF-8 text"))
413}
414
415pub fn decode_dbobject_xmltype_text(bytes: &[u8]) -> Result<Option<String>> {
416    let mut reader = DbObjectPackedReader::new(bytes);
417    reader.read_header()?;
418    reader.skip(1)?;
419    let xml_flag = reader.read_u32be()?;
420    if xml_flag & TNS_XML_TYPE_FLAG_SKIP_NEXT_4 != 0 {
421        reader.skip(4)?;
422    }
423    let bytes = reader.read_raw(reader.bytes_left())?;
424    if xml_flag & TNS_XML_TYPE_STRING != 0 {
425        return decode_dbobject_text(bytes, "DB_TYPE_VARCHAR").map(Some);
426    }
427    if xml_flag & TNS_XML_TYPE_LOB != 0 {
428        return Ok(None);
429    }
430    Err(ProtocolError::TtcDecode("unexpected XMLTYPE flag"))
431}
432
433pub fn decode_lob_text(bytes: &[u8], csfrm: u8, locator: Option<&[u8]>) -> Result<String> {
434    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
435    if !use_utf16 {
436        // Validate UTF-8 in place over the borrowed bytes, then allocate the
437        // owned String once. Equivalent to `String::from_utf8(bytes.to_vec())`
438        // but without the temporary Vec that was copied, validated, and moved.
439        return core::str::from_utf8(bytes)
440            .map(str::to_owned)
441            .map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-8 text"));
442    }
443    // UTF-16 (almost always AL16UTF16 from the server for a multi-byte CLOB).
444    // An odd byte count is malformed; reject it before decoding, matching the
445    // previous `chunks_exact().remainder()` check.
446    if !bytes.len().is_multiple_of(2) {
447        return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
448    }
449    // LOB text is overwhelmingly ASCII/Latin, where every UTF-16 code unit is a
450    // single ASCII byte (high byte 0, low byte < 0x80 in big-endian; the mirror
451    // in little-endian). Decode those inline — one `String::push` of a 1-byte
452    // char, no intermediate buffer — and only on the first non-ASCII or
453    // surrogate unit hand the *remaining* bytes to the general
454    // `char::decode_utf16` decoder. This skips the old intermediate `Vec<u16>`
455    // (a second large allocation filled by a separate byte-swap pass) for the
456    // common case while staying byte-for-byte identical to the previous
457    // `String::from_utf16` output, including its rejection of lone surrogates.
458    // The byte-index walk means the fallback never rescans what was already
459    // decoded, so the worst case matches the general decoder rather than
460    // doubling it.
461    let mut out = String::with_capacity(bytes.len() / 2);
462    let mut i = 0;
463    while i < bytes.len() {
464        let b0 = bytes[i];
465        let b1 = bytes[i + 1];
466        let is_ascii = if little_endian {
467            b1 == 0 && b0 < 0x80
468        } else {
469            b0 == 0 && b1 < 0x80
470        };
471        if is_ascii {
472            // The non-zero byte is the ASCII code point regardless of endianness.
473            let ascii = if little_endian { b0 } else { b1 };
474            out.push(ascii as char);
475            i += 2;
476        } else {
477            let units = bytes[i..].chunks_exact(2).map(|chunk| {
478                if little_endian {
479                    u16::from_le_bytes([chunk[0], chunk[1]])
480                } else {
481                    u16::from_be_bytes([chunk[0], chunk[1]])
482                }
483            });
484            for unit in char::decode_utf16(units) {
485                let ch = unit.map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))?;
486                out.push(ch);
487            }
488            return Ok(out);
489        }
490    }
491    Ok(out)
492}
493
494pub fn encode_lob_text(value: &str, csfrm: u8, locator: Option<&[u8]>) -> Vec<u8> {
495    let (use_utf16, little_endian) = lob_text_uses_utf16(csfrm, locator);
496    if !use_utf16 {
497        return value.as_bytes().to_vec();
498    }
499    let mut bytes = Vec::with_capacity(value.len() * 2);
500    for unit in value.encode_utf16() {
501        let encoded = if little_endian {
502            unit.to_le_bytes()
503        } else {
504            unit.to_be_bytes()
505        };
506        bytes.extend_from_slice(&encoded);
507    }
508    bytes
509}
510
511pub fn decode_bfile_locator_name(locator: &[u8]) -> Option<(String, String)> {
512    for dir_len_pos in 0..locator.len().saturating_sub(4) {
513        let dir_len = u16::from_be_bytes([locator[dir_len_pos], locator[dir_len_pos + 1]]) as usize;
514        if dir_len == 0 {
515            continue;
516        }
517        let dir_start = dir_len_pos + 2;
518        let dir_end = dir_start.checked_add(dir_len)?;
519        let file_len_end = dir_end.checked_add(2)?;
520        if file_len_end > locator.len() {
521            continue;
522        }
523        let file_len = u16::from_be_bytes([locator[dir_end], locator[dir_end + 1]]) as usize;
524        if file_len == 0 {
525            continue;
526        }
527        let file_start = file_len_end;
528        let file_end = file_start.checked_add(file_len)?;
529        if file_end != locator.len() {
530            continue;
531        }
532        let dir = std::str::from_utf8(&locator[dir_start..dir_end]).ok()?;
533        let file = std::str::from_utf8(&locator[file_start..file_end]).ok()?;
534        return Some((dir.to_string(), file.to_string()));
535    }
536    None
537}
538
539pub(crate) fn lob_text_uses_utf16(csfrm: u8, locator: Option<&[u8]>) -> (bool, bool) {
540    let use_utf16 = csfrm == CS_FORM_NCHAR
541        || locator
542            .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_3))
543            .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET != 0);
544    let little_endian = locator
545        .and_then(|locator| locator.get(TNS_LOB_LOC_OFFSET_FLAG_4))
546        .is_some_and(|flags| flags & TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN != 0);
547    (use_utf16, little_endian)
548}
549
550pub fn decode_dbobject_binary_float(bytes: &[u8]) -> Result<f32> {
551    let mut bytes: [u8; 4] = bytes
552        .try_into()
553        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_FLOAT"))?;
554    if bytes[0] & 0x80 != 0 {
555        bytes[0] &= 0x7f;
556    } else {
557        for byte in &mut bytes {
558            *byte = !*byte;
559        }
560    }
561    Ok(f32::from_bits(u32::from_be_bytes(bytes)))
562}
563
564pub fn decode_dbobject_binary_double(bytes: &[u8]) -> Result<f64> {
565    let mut bytes: [u8; 8] = bytes
566        .try_into()
567        .map_err(|_| ProtocolError::TtcDecode("invalid DbObject BINARY_DOUBLE"))?;
568    if bytes[0] & 0x80 != 0 {
569        bytes[0] &= 0x7f;
570    } else {
571        for byte in &mut bytes {
572            *byte = !*byte;
573        }
574    }
575    Ok(f64::from_bits(u64::from_be_bytes(bytes)))
576}
577
578#[cfg(test)]
579mod value_bytes_tests {
580    use super::*;
581
582    #[test]
583    fn value_bytes_roundtrip_uses_dbobject_single_u32be_length_format() {
584        for len in [244usize, 245, 246, 250, 65_535, 70_000] {
585            let value = (0..len).map(|i| (i % 251) as u8).collect::<Vec<_>>();
586            let mut image = Vec::new();
587            image_write_value_bytes(&mut image, &value).expect("encode value bytes");
588
589            let mut expected = Vec::new();
590            if len <= TNS_OBJ_MAX_SHORT_LENGTH {
591                expected.push(len as u8);
592            } else {
593                expected.push(TNS_LONG_LENGTH_INDICATOR);
594                expected.extend_from_slice(&(len as u32).to_be_bytes());
595            }
596            expected.extend_from_slice(&value);
597            assert_eq!(image, expected, "wire bytes for length {len}");
598
599            if len == 250 {
600                assert_eq!(
601                    &image[..5],
602                    &[TNS_LONG_LENGTH_INDICATOR, 0x00, 0x00, 0x00, 0xfa],
603                    "250-byte values use one big-endian u32 length"
604                );
605            }
606
607            let mut reader = DbObjectPackedReader::new(&image);
608            let decoded = reader
609                .read_value_bytes()
610                .expect("decode value bytes")
611                .expect("non-null value bytes");
612            assert_eq!(decoded, value, "length {len}");
613            assert_eq!(reader.remaining(), 0, "length {len}");
614        }
615    }
616}
617
618#[cfg(test)]
619mod bounded_reader_tests {
620    use super::*;
621    use crate::wire::BoundedReader;
622
623    // BoundedReader invariant (l2p), DbObject collection family: a packed image
624    // declaring a huge collection element count (via the long-length indicator
625    // + a ub4 ~620M) but carrying no element bytes must NOT drive a
626    // gigabyte-scale Vec pre-allocation. The collection decode loop lives in the
627    // pyshim, but the bound is structural: DbObjectPackedReader exposes
628    // `remaining()` so the count can be checked/capped against the buffer.
629    #[test]
630    fn dbobject_oversized_collection_count_is_bounded_by_remaining() {
631        // read_length long form: 0xfe then ub4 0x25000000 (~620M), no elements.
632        let bytes = [TNS_LONG_LENGTH_INDICATOR, 0x25, 0x00, 0x00, 0x00];
633        let mut reader = DbObjectPackedReader::new(&bytes);
634        let num_elements = reader.read_length().expect("length decodes");
635        assert_eq!(num_elements, 0x2500_0000);
636
637        // Only the (now zero) remaining bytes can be honestly allocated: an
638        // element needs at least one byte, so alloc_count_checked must reject
639        // the lie rather than letting a caller reserve ~620M slots.
640        assert!(
641            reader.alloc_count_checked(num_elements, 1).is_err(),
642            "declared count must not exceed the empty remaining buffer"
643        );
644        // The cap-and-grow flavor caps the pre-allocation at remaining() (0).
645        let v: Vec<u32> = reader.with_capacity_bounded(num_elements, 1);
646        assert_eq!(
647            v.capacity(),
648            0,
649            "pre-allocation must be capped by remaining"
650        );
651    }
652
653    // A legitimate small collection count whose elements really fit passes
654    // through unchanged (no false rejection of valid DbObjects).
655    #[test]
656    fn dbobject_legitimate_collection_count_passes() {
657        // 8 bytes of element payload remaining, two declared elements.
658        let bytes = [1u8, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11];
659        let reader = DbObjectPackedReader::new(&bytes);
660        assert_eq!(reader.alloc_count_checked(2, 1).expect("fits"), 2);
661        let v: Vec<u32> = reader.with_capacity_bounded(2, 1);
662        assert_eq!(v.capacity(), 2);
663    }
664}
665
666#[cfg(test)]
667mod decode_lob_text_tests {
668    use super::*;
669
670    /// A locator that drives the UTF-16 decode path, with selectable endianness.
671    fn utf16_locator(little_endian: bool) -> Vec<u8> {
672        let mut loc = vec![0u8; 40];
673        loc[TNS_LOB_LOC_OFFSET_FLAG_3] = TNS_LOB_LOC_FLAGS_VAR_LENGTH_CHARSET;
674        if little_endian {
675            loc[TNS_LOB_LOC_OFFSET_FLAG_4] = TNS_LOB_LOC_FLAGS_LITTLE_ENDIAN;
676        }
677        loc
678    }
679
680    fn encode_utf16(s: &str, little_endian: bool) -> Vec<u8> {
681        let mut bytes = Vec::with_capacity(s.len() * 2);
682        for unit in s.encode_utf16() {
683            let pair = if little_endian {
684                unit.to_le_bytes()
685            } else {
686                unit.to_be_bytes()
687            };
688            bytes.extend_from_slice(&pair);
689        }
690        bytes
691    }
692
693    /// Reference decoder = the previous implementation, used as the isomorphism
694    /// oracle for the optimized `decode_lob_text`.
695    fn reference_from_utf16(bytes: &[u8], little_endian: bool) -> Result<String> {
696        let mut chunks = bytes.chunks_exact(2);
697        let units = chunks
698            .by_ref()
699            .map(|chunk| {
700                if little_endian {
701                    u16::from_le_bytes([chunk[0], chunk[1]])
702                } else {
703                    u16::from_be_bytes([chunk[0], chunk[1]])
704                }
705            })
706            .collect::<Vec<_>>();
707        if !chunks.remainder().is_empty() {
708            return Err(ProtocolError::TtcDecode("invalid LOB UTF-16 text"));
709        }
710        String::from_utf16(&units).map_err(|_| ProtocolError::TtcDecode("invalid LOB UTF-16 text"))
711    }
712
713    #[test]
714    fn utf16_matches_reference_for_varied_text_both_endians() {
715        let samples = [
716            "",
717            "a",
718            "the quick brown fox 0123456789",
719            "café résumé naïve",           // BMP non-ASCII (Latin-1 supplement)
720            "ASCII then 漢字 then more",   // BMP CJK
721            "emoji: 😀🎉 mixed with text", // surrogate pairs
722            "\u{0000}\u{007f}\u{0080}\u{07ff}\u{0800}\u{ffff}", // boundary code points
723        ];
724        for sample in samples {
725            for little_endian in [false, true] {
726                let bytes = encode_utf16(sample, little_endian);
727                let loc = utf16_locator(little_endian);
728                let got =
729                    decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).expect("optimized decode");
730                let expected =
731                    reference_from_utf16(&bytes, little_endian).expect("reference decode");
732                assert_eq!(got, expected, "sample {sample:?} le={little_endian}");
733                assert_eq!(got, sample);
734            }
735        }
736    }
737
738    #[test]
739    fn utf16_odd_length_is_rejected_like_reference() {
740        let loc = utf16_locator(false);
741        // 3 bytes: one full unit plus a dangling byte.
742        let bytes = [0x00, 0x41, 0x00];
743        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
744        assert!(reference_from_utf16(&bytes, false).is_err());
745    }
746
747    #[test]
748    fn utf16_lone_surrogate_is_rejected_like_reference() {
749        let loc = utf16_locator(false);
750        // ASCII prefix then a lone high surrogate (no following low surrogate).
751        let mut bytes = encode_utf16("ok ", false);
752        bytes.extend_from_slice(&0xD83Du16.to_be_bytes());
753        bytes.extend_from_slice(&encode_utf16("tail", false));
754        assert!(decode_lob_text(&bytes, CS_FORM_NCHAR, Some(&loc)).is_err());
755        assert!(reference_from_utf16(&bytes, false).is_err());
756    }
757
758    #[test]
759    fn utf8_path_matches_from_utf8() {
760        // csfrm != NCHAR and no UTF-16 locator flag -> UTF-8 path.
761        let loc = vec![0u8; 40];
762        let sample = "café — utf8 path ✓";
763        let bytes = sample.as_bytes();
764        let got = decode_lob_text(bytes, 1, Some(&loc)).expect("utf8 decode");
765        assert_eq!(
766            got,
767            String::from_utf8(bytes.to_vec()).expect("sample is valid UTF-8")
768        );
769        assert_eq!(got, sample);
770        // invalid UTF-8 errors like String::from_utf8.
771        let bad = [0x66, 0x6f, 0xff, 0x6f];
772        assert!(decode_lob_text(&bad, 1, Some(&loc)).is_err());
773    }
774}