Skip to main content

tds_protocol/
codec.rs

1//! Codec utilities for TDS protocol encoding and decoding.
2//!
3//! This module provides low-level encoding and decoding utilities used
4//! throughout the TDS protocol implementation.
5
6use bytes::{Buf, BufMut};
7
8use crate::prelude::*;
9
10/// Read a length-prefixed UTF-16LE string.
11///
12/// The format is: 1-byte length (in characters) followed by UTF-16LE bytes.
13pub fn read_b_varchar(src: &mut impl Buf) -> Option<String> {
14    if src.remaining() < 1 {
15        return None;
16    }
17    let len = src.get_u8() as usize;
18    read_utf16_string(src, len)
19}
20
21/// Read a length-prefixed UTF-16LE string with 2-byte length.
22///
23/// The format is: 2-byte length (in characters) followed by UTF-16LE bytes.
24pub fn read_us_varchar(src: &mut impl Buf) -> Option<String> {
25    if src.remaining() < 2 {
26        return None;
27    }
28    let len = src.get_u16_le() as usize;
29    read_utf16_string(src, len)
30}
31
32/// Read a UTF-16LE string of specified character length.
33///
34/// Malformed UCS-2 (e.g. an unpaired surrogate) is decoded lossily, with each
35/// invalid unit replaced by U+FFFD, rather than failing. This keeps `None`
36/// meaning unambiguously "not enough bytes in the buffer" so callers do not
37/// conflate a decode failure with end-of-input.
38pub fn read_utf16_string(src: &mut impl Buf, char_count: usize) -> Option<String> {
39    let byte_count = char_count * 2;
40    if src.remaining() < byte_count {
41        return None;
42    }
43
44    let mut chars = Vec::with_capacity(char_count);
45    for _ in 0..char_count {
46        chars.push(src.get_u16_le());
47    }
48
49    Some(String::from_utf16_lossy(&chars))
50}
51
52/// Write a length-prefixed UTF-16LE string (1-byte length).
53pub fn write_b_varchar(dst: &mut impl BufMut, s: &str) {
54    let chars: Vec<u16> = s.encode_utf16().collect();
55    let len = chars.len().min(255) as u8;
56    dst.put_u8(len);
57    for &c in &chars[..len as usize] {
58        dst.put_u16_le(c);
59    }
60}
61
62/// Write a length-prefixed UTF-16LE string (2-byte length).
63pub fn write_us_varchar(dst: &mut impl BufMut, s: &str) {
64    let chars: Vec<u16> = s.encode_utf16().collect();
65    let len = chars.len().min(65535) as u16;
66    dst.put_u16_le(len);
67    for &c in &chars[..len as usize] {
68        dst.put_u16_le(c);
69    }
70}
71
72/// Write a UTF-16LE string without length prefix.
73pub fn write_utf16_string(dst: &mut impl BufMut, s: &str) {
74    for c in s.encode_utf16() {
75        dst.put_u16_le(c);
76    }
77}
78
79/// Read a null-terminated ASCII string.
80pub fn read_null_terminated_ascii(src: &mut impl Buf) -> Option<String> {
81    let mut bytes = Vec::new();
82    while src.has_remaining() {
83        let b = src.get_u8();
84        if b == 0 {
85            break;
86        }
87        bytes.push(b);
88    }
89    String::from_utf8(bytes).ok()
90}
91
92/// Calculate the byte length of a UTF-16 encoded string.
93#[must_use]
94pub fn utf16_byte_len(s: &str) -> usize {
95    s.encode_utf16().count() * 2
96}
97
98#[cfg(test)]
99#[allow(clippy::unwrap_used, clippy::expect_used)]
100mod tests {
101    use super::*;
102    use bytes::BytesMut;
103
104    #[test]
105    fn test_b_varchar_roundtrip() {
106        let original = "Hello, 世界!";
107        let mut buf = BytesMut::new();
108        write_b_varchar(&mut buf, original);
109
110        let mut cursor = buf.freeze();
111        let decoded = read_b_varchar(&mut cursor).unwrap();
112        assert_eq!(decoded, original);
113    }
114
115    #[test]
116    fn test_us_varchar_roundtrip() {
117        let original = "Test string with Unicode: αβγ";
118        let mut buf = BytesMut::new();
119        write_us_varchar(&mut buf, original);
120
121        let mut cursor = buf.freeze();
122        let decoded = read_us_varchar(&mut cursor).unwrap();
123        assert_eq!(decoded, original);
124    }
125
126    #[test]
127    fn test_utf16_byte_len() {
128        assert_eq!(utf16_byte_len("Hello"), 10);
129        assert_eq!(utf16_byte_len("世界"), 4);
130    }
131
132    /// An unpaired surrogate is malformed UCS-2. It must decode lossily (to
133    /// U+FFFD) rather than returning `None`, so the result is distinguishable
134    /// from a short buffer (#276).
135    #[test]
136    fn test_utf16_unpaired_surrogate_is_lossy_not_none() {
137        let mut buf = BytesMut::new();
138        buf.put_u16_le(0x0041); // 'A'
139        buf.put_u16_le(0xD800); // lone high surrogate (no following low surrogate)
140        buf.put_u16_le(0x0042); // 'B'
141        let mut cursor = buf.freeze();
142
143        let decoded = read_utf16_string(&mut cursor, 3)
144            .expect("malformed UCS-2 must decode lossily, not return None");
145        assert_eq!(decoded, "A\u{FFFD}B");
146    }
147
148    /// A short buffer is the only remaining `None` case.
149    #[test]
150    fn test_utf16_short_buffer_is_none() {
151        let mut buf = BytesMut::new();
152        buf.put_u16_le(0x0041); // only 1 char of bytes
153        let mut cursor = buf.freeze();
154        assert!(read_utf16_string(&mut cursor, 2).is_none());
155    }
156}