Skip to main content

codas/types/
text.rs

1//! UTF-8 encoded text data types.
2use core::{
3    fmt::{Debug, Display},
4    hash::Hash,
5    ops::Deref,
6};
7
8use alloc::borrow::ToOwned;
9use alloc::string::String;
10use alloc::string::ToString;
11use alloc::sync::Arc;
12
13use crate::codec::{
14    CodecError, DataHeader, Decodable, Encodable, Format, UnexpectedDataFormatSnafu,
15    WritesEncodable,
16};
17
18/// UTF-8 encoded text data.
19///
20/// This type may be used anywhere a `&str`
21/// works. To get a _mutable_ reference to
22/// the text (i.e., a `&mut String`), use
23/// [`Self::to_mut`].
24pub enum Text {
25    /// Statically allocated text (`&'static str`).
26    Static(&'static str),
27
28    /// Dynamic heap-allocated text.
29    Dynamic(Arc<String>),
30}
31
32impl Text {
33    /// Empty text.
34    pub const EMPTY: Text = Text::Static("");
35
36    /// Returns a [`Text::Static`] containing
37    /// the provided string.
38    pub const fn from(s: &'static str) -> Self {
39        Text::Static(s)
40    }
41
42    /// Returns a string slice containing the entire text.
43    pub fn as_str(&self) -> &str {
44        match self {
45            Text::Static(t) => t,
46            Text::Dynamic(t) => t.as_str(),
47        }
48    }
49
50    /// Returns a mutable reference to this text.
51    ///
52    /// If this text is not [`Text::Static`], the
53    /// contents of this text _may_ be cloned to
54    /// obtain a mutable reference. Use
55    /// [`Self::to_cleared_mut`] when wanting to
56    /// reuse the same allocation for _new_ text.
57    pub fn to_mut(&mut self) -> &mut String {
58        match self {
59            Text::Static(t) => {
60                *self = Text::Dynamic(t.to_string().into());
61                self.to_mut()
62            }
63            Text::Dynamic(t) => Arc::make_mut(t),
64        }
65    }
66
67    /// Returns a mutable reference to this text,
68    /// clearing the contents and reserving _at least_
69    /// `capacity` bytes in the text buffer.
70    ///
71    /// Use this function instead of [`Self::to_mut`]
72    /// to reuse this text for a _new_ string, potentially
73    /// avoiding unneeded allocations.
74    pub fn to_cleared_mut(&mut self, capacity: usize) -> &mut String {
75        match self {
76            Text::Static(_) => {
77                *self = Text::Dynamic(String::with_capacity(capacity).into());
78                self.to_mut()
79            }
80            Text::Dynamic(t) => {
81                // Clear the borrowed string if we're the only owner.
82                if let Some(t) = Arc::get_mut(t) {
83                    t.clear();
84                    t.reserve(capacity);
85                }
86
87                self.to_mut()
88            }
89        }
90    }
91}
92
93// Codec Traits ///////////////////////
94
95impl Encodable for Text {
96    /// Encoded as a \[[`u8`]\] containing the
97    /// result of [`str::as_bytes`].
98    const FORMAT: Format = <[u8]>::FORMAT;
99
100    fn encode(&self, writer: &mut (impl WritesEncodable + ?Sized)) -> Result<(), CodecError> {
101        self.as_bytes().encode(writer)
102    }
103
104    fn encode_header(
105        &self,
106        writer: &mut (impl WritesEncodable + ?Sized),
107    ) -> Result<(), CodecError> {
108        self.as_bytes().encode_header(writer)
109    }
110}
111
112impl Decodable for Text {
113    fn decode(
114        &mut self,
115        reader: &mut (impl crate::codec::ReadsDecodable + ?Sized),
116        header: Option<crate::codec::DataHeader>,
117    ) -> Result<(), crate::codec::CodecError> {
118        let header = Self::ensure_header(header, &[0])?;
119
120        // Text is always blob_size=1, data_fields=0.
121        if header.format.blob_size != 1 || header.format.data_fields != 0 {
122            return UnexpectedDataFormatSnafu {
123                expected: Self::FORMAT,
124                actual: Some(header),
125            }
126            .fail();
127        }
128
129        match self {
130            Text::Static(_) => {
131                // Reference the empty text for empty strings.
132                if header.count == 0 {
133                    *self = Text::EMPTY;
134
135                // Dynamically allocate for non-empty strings.
136                } else {
137                    let mut string = String::with_capacity(header.count as usize);
138                    try_decode_string(reader, header, &mut string)?;
139                    *self = Text::Dynamic(string.into());
140                }
141            }
142            Text::Dynamic(text) => {
143                // Clear the text contents, or reference
144                // the empty text, for empty strings.
145                if header.count == 0 {
146                    if let Some(text) = Arc::get_mut(text) {
147                        text.clear();
148                    } else {
149                        *self = Text::EMPTY;
150                    }
151                } else {
152                    match Arc::get_mut(text) {
153                        // Read data directly into the string buffer if
154                        // it's not shared.
155                        Some(text) => try_decode_string(reader, header, text)?,
156
157                        // Dynamically allocate a new string.
158                        None => {
159                            let mut string = String::with_capacity(header.count as usize);
160                            try_decode_string(reader, header, &mut string)?;
161                            *self = Text::Dynamic(string.into());
162                        }
163                    }
164                }
165            }
166        }
167
168        Ok(())
169    }
170}
171
172/// Tries to decode the remaining string data
173/// from `reader` for `header` into `string`.
174///
175/// If decoding fails for any reason, the returned
176/// `string` will be empty.
177fn try_decode_string(
178    reader: &mut (impl crate::codec::ReadsDecodable + ?Sized),
179    header: DataHeader,
180    string: &mut String,
181) -> Result<(), CodecError> {
182    unsafe {
183        // Truncate and pad the bytes to fit the new text.
184        let bytes = string.as_mut_vec();
185        bytes.truncate(header.count as usize);
186        bytes.reserve(header.count as usize);
187        while bytes.len() < header.count as usize {
188            bytes.push(0u8);
189        }
190
191        // Read in the raw bytes.
192        if let Err(e) = reader.read_exact(bytes) {
193            bytes.clear();
194            return Err(e);
195        }
196
197        // Validate the bytes.
198        if alloc::str::from_utf8(bytes).is_err() {
199            bytes.clear();
200
201            // FIXME: If a string contains malformed UTF-8 bytes,
202            //        should decoding fail? Or should the string
203            //        be silently interpreted as empty data, which
204            //        could lead to data loss?
205        }
206    }
207
208    Ok(())
209}
210
211// Common Traits //////////////////////
212impl Clone for Text {
213    fn clone(&self) -> Self {
214        match self {
215            Self::Static(text) => Self::Static(text),
216            Self::Dynamic(text) => Self::Dynamic(text.clone()),
217        }
218    }
219}
220
221impl Default for Text {
222    fn default() -> Self {
223        Self::EMPTY
224    }
225}
226
227impl Deref for Text {
228    type Target = str;
229
230    fn deref(&self) -> &Self::Target {
231        match self {
232            Text::Static(t) => t,
233            Text::Dynamic(t) => t,
234        }
235    }
236}
237
238impl From<&str> for Text {
239    fn from(value: &str) -> Self {
240        Self::Dynamic(Arc::new(value.to_owned()))
241    }
242}
243
244impl From<String> for Text {
245    fn from(value: String) -> Self {
246        Self::Dynamic(value.into())
247    }
248}
249
250// Self-equivalency traits ////////////
251
252impl PartialEq for Text {
253    fn eq(&self, other: &Self) -> bool {
254        self.deref() == other.deref()
255    }
256}
257
258impl Eq for Text {}
259
260impl PartialOrd for Text {
261    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
262        Some(self.cmp(other))
263    }
264}
265
266impl Ord for Text {
267    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
268        self.deref().cmp(other.deref())
269    }
270}
271
272impl Hash for Text {
273    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
274        self.deref().hash(state)
275    }
276}
277
278// Other equivalency traits ///////////
279
280impl PartialEq<str> for Text {
281    fn eq(&self, other: &str) -> bool {
282        self.deref() == other
283    }
284}
285
286impl PartialEq<&str> for Text {
287    fn eq(&self, other: &&str) -> bool {
288        self.deref() == *other
289    }
290}
291
292impl PartialEq<Text> for &str {
293    fn eq(&self, other: &Text) -> bool {
294        *self == other.deref()
295    }
296}
297
298impl PartialEq<Text> for str {
299    fn eq(&self, other: &Text) -> bool {
300        self == other.deref()
301    }
302}
303
304impl PartialEq<String> for Text {
305    fn eq(&self, other: &String) -> bool {
306        self.deref() == other
307    }
308}
309
310impl PartialEq<Text> for String {
311    fn eq(&self, other: &Text) -> bool {
312        self == other.deref()
313    }
314}
315
316// Formatting traits //////////////////
317
318impl Display for Text {
319    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
320        <str as Display>::fmt(self, f)
321    }
322}
323
324impl Debug for Text {
325    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
326        <str as Debug>::fmt(self, f)
327    }
328}
329
330// Serde traits ///////////////////////
331
332#[cfg(feature = "serde")]
333impl serde::Serialize for Text {
334    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
335    where
336        S: serde::Serializer,
337    {
338        match self {
339            Text::Static(text) => text.serialize(serializer),
340            Text::Dynamic(text) => text.as_str().serialize(serializer),
341        }
342    }
343}
344
345#[cfg(feature = "serde")]
346impl<'de> serde::Deserialize<'de> for Text {
347    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
348    where
349        D: serde::Deserializer<'de>,
350    {
351        Ok(Self::Dynamic(String::deserialize(deserializer)?.into()))
352    }
353}
354
355#[cfg(test)]
356mod test {
357    use crate::codec::ReadsDecodable;
358
359    use super::*;
360
361    #[test]
362    fn test_text_string_eq() {
363        let text = Text::from("Hello");
364        let string = String::from("Hello");
365        assert_eq!(text, string);
366        assert_eq!(string, text);
367        assert_eq!(text, string.as_str());
368        assert_eq!(string.as_str(), text);
369
370        let text = Text::from("World ♥️");
371        let string = String::from("World ♥️");
372        assert_eq!(text, string);
373        assert_eq!(string, text);
374        assert_eq!(text, string.as_str());
375        assert_eq!(string.as_str(), text);
376
377        let text = Text::from("We Like Text");
378        let string = String::from("Because Strings are Worse?");
379        assert_ne!(text, string);
380        assert_ne!(string, text);
381        assert_ne!(text, string.as_str());
382        assert_ne!(string.as_str(), text);
383    }
384
385    #[test]
386    fn test_text_codec() {
387        let value = Text::from("Hello, world!");
388        let mut encoded = vec![];
389        encoded.write_data(&value).expect("encoded");
390        let decoded: Text = encoded.as_slice().read_data().expect("decoded");
391        assert_eq!(value, decoded);
392    }
393}