async_zip/string.rs
1// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
2// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
3
4use crate::error::{Result, ZipError};
5
6/// A string encoding supported by this crate.
7#[derive(Debug, Clone, Copy)]
8pub enum StringEncoding {
9 Utf8,
10 Raw,
11}
12
13/// A string wrapper for handling different encodings.
14#[derive(Debug, Clone)]
15pub struct ZipString {
16 encoding: StringEncoding,
17 raw: Vec<u8>,
18 alternative: Option<Vec<u8>>,
19}
20
21impl ZipString {
22 /// Constructs a new encoded string from its raw bytes and its encoding type.
23 ///
24 /// # Note
25 /// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to
26 /// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`].
27 pub fn new(raw: Vec<u8>, mut encoding: StringEncoding) -> Self {
28 if let StringEncoding::Utf8 = encoding {
29 if std::str::from_utf8(&raw).is_err() {
30 encoding = StringEncoding::Raw;
31 }
32 }
33
34 Self { encoding, raw, alternative: None }
35 }
36
37 /// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding.
38 pub fn new_with_alternative(utf8: String, alternative: Vec<u8>) -> Self {
39 Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) }
40 }
41
42 /// Returns the raw bytes for this string.
43 pub fn as_bytes(&self) -> &[u8] {
44 &self.raw
45 }
46
47 /// Returns the encoding type for this string.
48 pub fn encoding(&self) -> StringEncoding {
49 self.encoding
50 }
51
52 /// Returns the alternative bytes (in native MBCS encoding) for this string.
53 pub fn alternative(&self) -> Option<&[u8]> {
54 self.alternative.as_deref()
55 }
56
57 /// Returns the raw bytes converted into a string slice.
58 ///
59 /// # Note
60 /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
61 pub fn as_str(&self) -> Result<&str> {
62 if !matches!(self.encoding, StringEncoding::Utf8) {
63 return Err(ZipError::StringNotUtf8);
64 }
65
66 // SAFETY:
67 // "The bytes passed in must be valid UTF-8.'
68 //
69 // This function will error if self.encoding is not StringEncoding::Utf8.
70 //
71 // self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the
72 // call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is
73 // never given and no method implemented on this type mutates the inner vector.
74
75 Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) })
76 }
77
78 /// Returns the raw bytes converted to an owned string.
79 ///
80 /// # Note
81 /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
82 pub fn into_string(self) -> Result<String> {
83 if !matches!(self.encoding, StringEncoding::Utf8) {
84 return Err(ZipError::StringNotUtf8);
85 }
86
87 // SAFETY: See above.
88 Ok(unsafe { String::from_utf8_unchecked(self.raw) })
89 }
90
91 /// Returns the alternative bytes (in native MBCS encoding) converted to the owned.
92 pub fn into_alternative(self) -> Option<Vec<u8>> {
93 self.alternative
94 }
95
96 /// Returns whether this string is encoded as utf-8 without an alternative.
97 pub fn is_utf8_without_alternative(&self) -> bool {
98 matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none()
99 }
100}
101
102impl From<String> for ZipString {
103 fn from(value: String) -> Self {
104 Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None }
105 }
106}
107
108impl From<&str> for ZipString {
109 fn from(value: &str) -> Self {
110 Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None }
111 }
112}