1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
/// A string encoding supported by this crate.
#[derive(Debug, Clone, Copy)]
pub enum StringEncoding {
Utf8,
Raw,
}
/// A string wrapper for handling different encodings.
#[derive(Debug, Clone)]
pub struct ZipString {
encoding: StringEncoding,
raw: Vec<u8>,
alternative: Option<Vec<u8>>,
}
impl ZipString {
/// Constructs a new encoded string from its raw bytes and its encoding type.
///
/// # Note
/// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to
/// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`].
pub fn new(raw: Vec<u8>, mut encoding: StringEncoding) -> Self {
if let StringEncoding::Utf8 = encoding {
if std::str::from_utf8(&raw).is_err() {
encoding = StringEncoding::Raw;
}
}
Self { encoding, raw, alternative: None }
}
/// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding.
pub fn new_with_alternative(utf8: String, alternative: Vec<u8>) -> Self {
Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) }
}
/// Returns the raw bytes for this string.
pub fn as_bytes(&self) -> &[u8] {
&self.raw
}
/// Returns the encoding type for this string.
pub fn encoding(&self) -> StringEncoding {
self.encoding
}
/// Returns the alternative bytes (in native MBCS encoding) for this string.
pub fn alternative(&self) -> Option<&[u8]> {
self.alternative.as_deref()
}
/// Returns the raw bytes converted into a string slice.
///
/// # Note
/// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
pub fn as_str(&self) -> Result<&str> {
if !matches!(self.encoding, StringEncoding::Utf8) {
return Err(ZipError::StringNotUtf8);
}
// SAFETY:
// "The bytes passed in must be valid UTF-8.'
//
// This function will error if self.encoding is not StringEncoding::Utf8.
//
// self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the
// call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is
// never given and no method implemented on this type mutates the inner vector.
Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) })
}
/// Returns the raw bytes converted to an owned string.
///
/// # Note
/// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
pub fn into_string(self) -> Result<String> {
if !matches!(self.encoding, StringEncoding::Utf8) {
return Err(ZipError::StringNotUtf8);
}
// SAFETY: See above.
Ok(unsafe { String::from_utf8_unchecked(self.raw) })
}
/// Returns the alternative bytes (in native MBCS encoding) converted to the owned.
pub fn into_alternative(self) -> Option<Vec<u8>> {
self.alternative
}
/// Returns whether this string is encoded as utf-8 without an alternative.
pub fn is_utf8_without_alternative(&self) -> bool {
matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none()
}
}
impl From<String> for ZipString {
fn from(value: String) -> Self {
Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None }
}
}
impl From<&str> for ZipString {
fn from(value: &str) -> Self {
Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None }
}
}