Skip to main content

qubit_io/ext/
string_read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    Error,
12    ErrorKind,
13    Read,
14    Result,
15};
16use std::string::FromUtf8Error;
17
18use crate::{
19    BinaryReadExt,
20    Leb128ReadExt,
21};
22
23/// Extension methods for reading length-prefixed UTF-8 strings.
24pub trait StringReadExt: Read {
25    /// Reads a UTF-8 string with an unsigned LEB128 byte-length prefix.
26    ///
27    /// # Parameters
28    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
29    ///
30    /// # Returns
31    /// The decoded string.
32    ///
33    /// # Errors
34    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
35    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
36    /// when the payload is not valid UTF-8.
37    fn read_utf8_string_uleb(&mut self, max_len: usize) -> Result<String>;
38
39    /// Reads a UTF-8 string with a canonical unsigned LEB128 byte-length prefix.
40    ///
41    /// # Parameters
42    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
43    ///
44    /// # Returns
45    /// The decoded string.
46    ///
47    /// # Errors
48    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
49    /// when the length prefix is malformed or non-canonical, [`ErrorKind::InvalidData`]
50    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
51    /// when the payload is not valid UTF-8.
52    fn read_utf8_string_uleb_strict(&mut self, max_len: usize) -> Result<String>;
53
54    /// Reads a UTF-8 string with a big-endian `u16` byte-length prefix.
55    ///
56    /// # Parameters
57    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
58    ///
59    /// # Returns
60    /// The decoded string.
61    ///
62    /// # Errors
63    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
64    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
65    /// when the payload is not valid UTF-8.
66    fn read_utf8_string_u16_be(&mut self, max_len: usize) -> Result<String>;
67
68    /// Reads a UTF-8 string with a little-endian `u16` byte-length prefix.
69    ///
70    /// # Parameters
71    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
72    ///
73    /// # Returns
74    /// The decoded string.
75    ///
76    /// # Errors
77    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
78    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
79    /// when the payload is not valid UTF-8.
80    fn read_utf8_string_u16_le(&mut self, max_len: usize) -> Result<String>;
81
82    /// Reads a UTF-8 string with a big-endian `u32` byte-length prefix.
83    ///
84    /// # Parameters
85    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
86    ///
87    /// # Returns
88    /// The decoded string.
89    ///
90    /// # Errors
91    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
92    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
93    /// when the payload is not valid UTF-8.
94    fn read_utf8_string_u32_be(&mut self, max_len: usize) -> Result<String>;
95
96    /// Reads a UTF-8 string with a little-endian `u32` byte-length prefix.
97    ///
98    /// # Parameters
99    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
100    ///
101    /// # Returns
102    /// The decoded string.
103    ///
104    /// # Errors
105    /// Returns an I/O error for length or payload reads, [`ErrorKind::InvalidData`]
106    /// when the encoded length exceeds `max_len`, or [`ErrorKind::InvalidData`]
107    /// when the payload is not valid UTF-8.
108    fn read_utf8_string_u32_le(&mut self, max_len: usize) -> Result<String>;
109}
110
111impl<T> StringReadExt for T
112where
113    T: Read + ?Sized,
114{
115    #[inline]
116    fn read_utf8_string_uleb(&mut self, max_len: usize) -> Result<String> {
117        read_utf8_string_uleb_impl(self, max_len)
118    }
119
120    #[inline]
121    fn read_utf8_string_uleb_strict(&mut self, max_len: usize) -> Result<String> {
122        read_utf8_string_uleb_strict_impl(self, max_len)
123    }
124
125    #[inline]
126    fn read_utf8_string_u16_be(&mut self, max_len: usize) -> Result<String> {
127        read_utf8_string_u16_be_impl(self, max_len)
128    }
129
130    #[inline]
131    fn read_utf8_string_u16_le(&mut self, max_len: usize) -> Result<String> {
132        read_utf8_string_u16_le_impl(self, max_len)
133    }
134
135    #[inline]
136    fn read_utf8_string_u32_be(&mut self, max_len: usize) -> Result<String> {
137        read_utf8_string_u32_be_impl(self, max_len)
138    }
139
140    #[inline]
141    fn read_utf8_string_u32_le(&mut self, max_len: usize) -> Result<String> {
142        read_utf8_string_u32_le_impl(self, max_len)
143    }
144}
145
146fn read_utf8_string_uleb_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
147where
148    T: Read + ?Sized,
149{
150    let len = reader.read_uleb_usize()?;
151    read_utf8_payload(reader, len, max_len)
152}
153
154fn read_utf8_string_uleb_strict_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
155where
156    T: Read + ?Sized,
157{
158    let len = reader.read_uleb_usize_strict()?;
159    read_utf8_payload(reader, len, max_len)
160}
161
162fn read_utf8_string_u16_be_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
163where
164    T: Read + ?Sized,
165{
166    let len = reader.read_u16_be()? as usize;
167    read_utf8_payload(reader, len, max_len)
168}
169
170fn read_utf8_string_u16_le_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
171where
172    T: Read + ?Sized,
173{
174    let len = reader.read_u16_le()? as usize;
175    read_utf8_payload(reader, len, max_len)
176}
177
178fn read_utf8_string_u32_be_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
179where
180    T: Read + ?Sized,
181{
182    let len = reader.read_u32_be()? as usize;
183    read_utf8_payload(reader, len, max_len)
184}
185
186fn read_utf8_string_u32_le_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
187where
188    T: Read + ?Sized,
189{
190    let len = reader.read_u32_le()? as usize;
191    read_utf8_payload(reader, len, max_len)
192}
193
194fn read_utf8_payload<T>(reader: &mut T, len: usize, max_len: usize) -> Result<String>
195where
196    T: Read + ?Sized,
197{
198    if len > max_len {
199        return Err(length_exceeded_error(len, max_len));
200    }
201    let mut bytes = vec![0; len];
202    reader.read_exact(&mut bytes)?;
203    String::from_utf8(bytes).map_err(invalid_utf8_error)
204}
205
206fn length_exceeded_error(len: usize, max_len: usize) -> Error {
207    Error::new(
208        ErrorKind::InvalidData,
209        format!("string length {len} exceeds maximum length of {max_len} bytes"),
210    )
211}
212
213fn invalid_utf8_error(error: FromUtf8Error) -> Error {
214    Error::new(
215        ErrorKind::InvalidData,
216        format!("length-prefixed string is not valid UTF-8: {error}"),
217    )
218}