Skip to main content

qubit_io/ext/
string_read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    Read,
12    Result,
13};
14
15use crate::util::read_utf8_payload as read_utf8_payload_impl;
16use crate::{
17    BinaryReadExt,
18    ByteOrder,
19    Leb128ReadExt,
20};
21
22/// Extension methods for reading length-prefixed UTF-8 strings.
23pub trait StringReadExt: Read {
24    /// Reads a UTF-8 payload with an already decoded byte length.
25    ///
26    /// # Parameters
27    /// - `len`: UTF-8 payload length in bytes.
28    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
29    ///
30    /// # Returns
31    /// The decoded string.
32    ///
33    /// # Errors
34    /// Returns an I/O error for payload reads, [`std::io::ErrorKind::InvalidData`] when
35    /// `len` exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`] when the payload
36    /// is not valid UTF-8.
37    fn read_utf8_payload(&mut self, len: usize, max_len: usize) -> Result<String>;
38
39    /// Reads a UTF-8 string with an unsigned LEB128 byte-length prefix.
40    ///
41    /// The length prefix is decoded as `usize`, so this format is target-width
42    /// dependent. Prefer `u16` or `u32` length-prefix methods for persistent
43    /// files and cross-platform protocols.
44    ///
45    /// # Parameters
46    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
47    ///
48    /// # Returns
49    /// The decoded string.
50    ///
51    /// # Errors
52    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
53    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
54    /// when the payload is not valid UTF-8.
55    fn read_utf8_string_uleb(&mut self, max_len: usize) -> Result<String>;
56
57    /// Reads a UTF-8 string with a canonical unsigned LEB128 byte-length prefix.
58    ///
59    /// The length prefix is decoded as `usize`, so this format is target-width
60    /// dependent. Prefer `u16` or `u32` length-prefix methods for persistent
61    /// files and cross-platform protocols.
62    ///
63    /// # Parameters
64    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
65    ///
66    /// # Returns
67    /// The decoded string.
68    ///
69    /// # Errors
70    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
71    /// when the length prefix is malformed or non-canonical, [`std::io::ErrorKind::InvalidData`]
72    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
73    /// when the payload is not valid UTF-8.
74    fn read_utf8_string_uleb_strict(&mut self, max_len: usize) -> Result<String>;
75
76    /// Reads a UTF-8 string with a runtime-order `u16` byte-length prefix.
77    ///
78    /// # Parameters
79    /// - `byte_order`: Byte order used by the length prefix.
80    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
81    ///
82    /// # Returns
83    /// The decoded string.
84    ///
85    /// # Errors
86    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
87    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
88    /// when the payload is not valid UTF-8.
89    fn read_utf8_string_u16(&mut self, byte_order: ByteOrder, max_len: usize) -> Result<String>;
90
91    /// Reads a UTF-8 string with a big-endian `u16` byte-length prefix.
92    ///
93    /// # Parameters
94    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
95    ///
96    /// # Returns
97    /// The decoded string.
98    ///
99    /// # Errors
100    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
101    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
102    /// when the payload is not valid UTF-8.
103    fn read_utf8_string_u16_be(&mut self, max_len: usize) -> Result<String>;
104
105    /// Reads a UTF-8 string with a little-endian `u16` byte-length prefix.
106    ///
107    /// # Parameters
108    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
109    ///
110    /// # Returns
111    /// The decoded string.
112    ///
113    /// # Errors
114    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
115    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
116    /// when the payload is not valid UTF-8.
117    fn read_utf8_string_u16_le(&mut self, max_len: usize) -> Result<String>;
118
119    /// Reads a UTF-8 string with a runtime-order `u32` byte-length prefix.
120    ///
121    /// # Parameters
122    /// - `byte_order`: Byte order used by the length prefix.
123    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
124    ///
125    /// # Returns
126    /// The decoded string.
127    ///
128    /// # Errors
129    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
130    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
131    /// when the payload is not valid UTF-8.
132    fn read_utf8_string_u32(&mut self, byte_order: ByteOrder, max_len: usize) -> Result<String>;
133
134    /// Reads a UTF-8 string with a big-endian `u32` byte-length prefix.
135    ///
136    /// # Parameters
137    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
138    ///
139    /// # Returns
140    /// The decoded string.
141    ///
142    /// # Errors
143    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
144    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
145    /// when the payload is not valid UTF-8.
146    fn read_utf8_string_u32_be(&mut self, max_len: usize) -> Result<String>;
147
148    /// Reads a UTF-8 string with a little-endian `u32` byte-length prefix.
149    ///
150    /// # Parameters
151    /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
152    ///
153    /// # Returns
154    /// The decoded string.
155    ///
156    /// # Errors
157    /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
158    /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
159    /// when the payload is not valid UTF-8.
160    fn read_utf8_string_u32_le(&mut self, max_len: usize) -> Result<String>;
161}
162
163impl<T> StringReadExt for T
164where
165    T: Read + ?Sized,
166{
167    #[inline]
168    fn read_utf8_payload(&mut self, len: usize, max_len: usize) -> Result<String> {
169        read_utf8_payload_impl(self, len, max_len)
170    }
171
172    #[inline]
173    fn read_utf8_string_uleb(&mut self, max_len: usize) -> Result<String> {
174        let len = self.read_uleb_usize()?;
175        read_utf8_payload_impl(self, len, max_len)
176    }
177
178    #[inline]
179    fn read_utf8_string_uleb_strict(&mut self, max_len: usize) -> Result<String> {
180        let len = self.read_uleb_usize_strict()?;
181        read_utf8_payload_impl(self, len, max_len)
182    }
183
184    #[inline]
185    fn read_utf8_string_u16(&mut self, byte_order: ByteOrder, max_len: usize) -> Result<String> {
186        let len = usize::from(self.read_u16(byte_order)?);
187        read_utf8_payload_impl(self, len, max_len)
188    }
189
190    #[inline]
191    fn read_utf8_string_u16_be(&mut self, max_len: usize) -> Result<String> {
192        let len = self.read_u16_be()? as usize;
193        read_utf8_payload_impl(self, len, max_len)
194    }
195
196    #[inline]
197    fn read_utf8_string_u16_le(&mut self, max_len: usize) -> Result<String> {
198        let len = self.read_u16_le()? as usize;
199        read_utf8_payload_impl(self, len, max_len)
200    }
201
202    #[inline]
203    fn read_utf8_string_u32(&mut self, byte_order: ByteOrder, max_len: usize) -> Result<String> {
204        let len = self.read_u32(byte_order)? as usize;
205        read_utf8_payload_impl(self, len, max_len)
206    }
207
208    #[inline]
209    fn read_utf8_string_u32_be(&mut self, max_len: usize) -> Result<String> {
210        let len = self.read_u32_be()? as usize;
211        read_utf8_payload_impl(self, len, max_len)
212    }
213
214    #[inline]
215    fn read_utf8_string_u32_le(&mut self, max_len: usize) -> Result<String> {
216        let len = self.read_u32_le()? as usize;
217        read_utf8_payload_impl(self, len, max_len)
218    }
219}