async_zip/
string.rs

1// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
2// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
3
4use crate::error::{Result, ZipError};
5
6/// A string encoding supported by this crate.
7#[derive(Debug, Clone, Copy)]
8pub enum StringEncoding {
9    Utf8,
10    Raw,
11}
12
13/// A string wrapper for handling different encodings.
14#[derive(Debug, Clone)]
15pub struct ZipString {
16    encoding: StringEncoding,
17    raw: Vec<u8>,
18    alternative: Option<Vec<u8>>,
19}
20
21impl ZipString {
22    /// Constructs a new encoded string from its raw bytes and its encoding type.
23    ///
24    /// # Note
25    /// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to
26    /// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`].
27    pub fn new(raw: Vec<u8>, mut encoding: StringEncoding) -> Self {
28        if let StringEncoding::Utf8 = encoding {
29            if std::str::from_utf8(&raw).is_err() {
30                encoding = StringEncoding::Raw;
31            }
32        }
33
34        Self { encoding, raw, alternative: None }
35    }
36
37    /// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding.
38    pub fn new_with_alternative(utf8: String, alternative: Vec<u8>) -> Self {
39        Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) }
40    }
41
42    /// Returns the raw bytes for this string.
43    pub fn as_bytes(&self) -> &[u8] {
44        &self.raw
45    }
46
47    /// Returns the encoding type for this string.
48    pub fn encoding(&self) -> StringEncoding {
49        self.encoding
50    }
51
52    /// Returns the alternative bytes (in native MBCS encoding) for this string.
53    pub fn alternative(&self) -> Option<&[u8]> {
54        self.alternative.as_deref()
55    }
56
57    /// Returns the raw bytes converted into a string slice.
58    ///
59    /// # Note
60    /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
61    pub fn as_str(&self) -> Result<&str> {
62        if !matches!(self.encoding, StringEncoding::Utf8) {
63            return Err(ZipError::StringNotUtf8);
64        }
65
66        // SAFETY:
67        // "The bytes passed in must be valid UTF-8.'
68        //
69        // This function will error if self.encoding is not StringEncoding::Utf8.
70        //
71        // self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the
72        // call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is
73        // never given and no method implemented on this type mutates the inner vector.
74
75        Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) })
76    }
77
78    /// Returns the raw bytes converted to an owned string.
79    ///
80    /// # Note
81    /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
82    pub fn into_string(self) -> Result<String> {
83        if !matches!(self.encoding, StringEncoding::Utf8) {
84            return Err(ZipError::StringNotUtf8);
85        }
86
87        // SAFETY: See above.
88        Ok(unsafe { String::from_utf8_unchecked(self.raw) })
89    }
90
91    /// Returns the alternative bytes (in native MBCS encoding) converted to the owned.
92    pub fn into_alternative(self) -> Option<Vec<u8>> {
93        self.alternative
94    }
95
96    /// Returns whether this string is encoded as utf-8 without an alternative.
97    pub fn is_utf8_without_alternative(&self) -> bool {
98        matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none()
99    }
100}
101
102impl From<String> for ZipString {
103    fn from(value: String) -> Self {
104        Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None }
105    }
106}
107
108impl From<&str> for ZipString {
109    fn from(value: &str) -> Self {
110        Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None }
111    }
112}