use crate::{Error, ZipStr};
use std::borrow::Cow;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RawPath<'a>(ZipStr<'a>);
impl AsRef<[u8]> for RawPath<'_> {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_bytes()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct NormalizedPath<'a>(Cow<'a, str>);
impl AsRef<[u8]> for NormalizedPath<'_> {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_bytes()
}
}
impl AsRef<str> for NormalizedPath<'_> {
#[inline]
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct NormalizedPathBuf(String);
impl AsRef<[u8]> for NormalizedPathBuf {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_bytes()
}
}
impl AsRef<str> for NormalizedPathBuf {
#[inline]
fn as_ref(&self) -> &str {
&self.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ZipFilePath<R> {
data: R,
}
impl ZipFilePath<()> {
#[inline]
pub fn from_bytes(data: &[u8]) -> ZipFilePath<RawPath<'_>> {
ZipFilePath {
data: RawPath(ZipStr::new(data)),
}
}
#[inline]
#[allow(clippy::should_implement_trait)] pub fn from_str(mut name: &str) -> ZipFilePath<NormalizedPath<'_>> {
let mut last = 0;
for &c in name.as_bytes() {
if matches!(
(c, last),
(b'\\', _) | (b'/', b'/') | (b'.', b'.') | (b'.', b'/') | (b':', _)
) {
return ZipFilePath {
data: NormalizedPath(Cow::Owned(Self::normalize_alloc(name))),
};
}
last = c;
}
loop {
name = match name.as_bytes() {
[b'.', b'.', b'/', ..] => name.trim_start_matches("../"),
[b'.', b'/', ..] => name.trim_start_matches("./"),
[b'/', ..] => name.trim_start_matches('/'),
_ => {
return ZipFilePath {
data: NormalizedPath(Cow::Borrowed(name)),
}
}
}
}
}
fn normalize_alloc(s: &str) -> String {
let s = s.replace('\\', "/");
let s = s.split(':').next_back().unwrap_or_default();
let splits = s.split('/');
let mut result = String::new();
for split in splits {
if split.is_empty() || split == "." {
continue;
}
if split == ".." {
let last = result.rfind('/');
result.truncate(last.unwrap_or(0));
continue;
}
if !result.is_empty() {
result.push('/');
}
result.push_str(split);
}
result
}
}
impl<R> ZipFilePath<R>
where
R: AsRef<[u8]>,
{
#[inline]
pub fn is_dir(&self) -> bool {
self.data.as_ref().last() == Some(&b'/')
}
#[inline]
pub fn len(&self) -> usize {
self.data.as_ref().len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.data.as_ref().is_empty()
}
}
impl<R> ZipFilePath<R>
where
R: AsRef<str>,
{
pub(crate) fn needs_utf8_encoding(&self) -> bool {
for ch in self.data.as_ref().chars() {
let code_point = ch as u32;
if !(0x20..=0x7d).contains(&code_point) || code_point == 0x5c {
return true;
}
}
false
}
}
impl<'a> ZipFilePath<RawPath<'a>> {
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.data.0.as_bytes()
}
#[inline]
pub fn try_normalize(self) -> Result<ZipFilePath<NormalizedPath<'a>>, Error> {
let raw_data = self.data.0;
let name = std::str::from_utf8(raw_data.as_bytes()).map_err(Error::utf8)?;
Ok(ZipFilePath::from_str(name))
}
}
impl AsRef<[u8]> for ZipFilePath<RawPath<'_>> {
#[inline]
fn as_ref(&self) -> &[u8] {
self.data.0.as_bytes()
}
}
impl AsRef<str> for ZipFilePath<NormalizedPath<'_>> {
#[inline]
fn as_ref(&self) -> &str {
self.data.0.as_ref()
}
}
impl AsRef<str> for ZipFilePath<NormalizedPathBuf> {
#[inline]
fn as_ref(&self) -> &str {
self.data.0.as_ref()
}
}
impl From<ZipFilePath<NormalizedPathBuf>> for String {
#[inline]
fn from(path: ZipFilePath<NormalizedPathBuf>) -> Self {
path.data.0
}
}
impl From<ZipFilePath<NormalizedPath<'_>>> for String {
#[inline]
fn from(path: ZipFilePath<NormalizedPath<'_>>) -> Self {
path.data.0.into_owned()
}
}
impl ZipFilePath<NormalizedPath<'_>> {
#[inline]
pub fn as_str(&self) -> &str {
self.data.0.as_ref()
}
#[inline]
pub fn into_owned(self) -> ZipFilePath<NormalizedPathBuf> {
ZipFilePath {
data: NormalizedPathBuf(self.data.0.into_owned()),
}
}
}
impl ZipFilePath<NormalizedPathBuf> {
#[inline]
pub fn as_str(&self) -> &str {
self.data.0.as_ref()
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
#[case(b"test.txt", "test.txt")]
#[case(b"dir/test.txt", "dir/test.txt")]
#[case(b"dir\\test.txt", "dir/test.txt")]
#[case(b"dir//test.txt", "dir/test.txt")]
#[case(b"/test.txt", "test.txt")]
#[case(b"../test.txt", "test.txt")]
#[case(b"dir/../test.txt", "test.txt")]
#[case(b"./test.txt", "test.txt")]
#[case(b"dir/./test.txt", "dir/test.txt")]
#[case(b"dir/./../test.txt", "test.txt")]
#[case(b"dir/sub/../test.txt", "dir/test.txt")]
#[case(b"dir/../../test.txt", "test.txt")]
#[case(b"../../../test.txt", "test.txt")]
#[case(b"a/b/../../test.txt", "test.txt")]
#[case(b"a/b/c/../../../test.txt", "test.txt")]
#[case(b"a/b/c/d/../../test.txt", "a/b/test.txt")]
#[case(b"C:\\hello\\test.txt", "hello/test.txt")]
#[case(b"C:/hello\\test.txt", "hello/test.txt")]
#[case(b"C:/hello/test.txt", "hello/test.txt")]
fn test_zip_path_normalized(#[case] input: &[u8], #[case] expected: &str) {
assert_eq!(
ZipFilePath::from_bytes(input)
.try_normalize()
.unwrap()
.as_ref(),
expected
);
}
#[rstest]
#[case(&[0xFF])]
#[case(&[b't', b'e', b's', b't', 0xFF])]
fn test_zip_path_normalized_invalid_utf8(#[case] input: &[u8]) {
assert!(ZipFilePath::from_bytes(input).try_normalize().is_err());
}
#[rstest]
#[case("test.txt", false)]
#[case("hello_world", false)]
#[case("file.name.ext", false)]
#[case("hello!", false)]
#[case("hello{world}", false)]
#[case("hello|world", false)]
#[case("hello`world", false)]
#[case("hello\"world", false)]
#[case("hello<world>", false)]
#[case("hello;world", false)]
#[case("hello:world", false)]
#[case("hello^world", false)]
#[case("hello\u{00A0}world", true)]
#[case("hello\u{0080}world", true)]
#[case("hello\u{00FF}world", true)]
#[case("hello\u{0100}world", true)]
#[case("hello\u{03B1}world", true)]
#[case("hello\u{4E00}world", true)]
#[case("hello\u{1F600}world", true)]
#[case(r"hello\world", false)] #[case("hello~world", true)]
#[case("hello\u{007F}world", true)]
#[case("hello\u{001F}world", true)]
#[case("hello\u{0000}world", true)]
#[case("hello\u{0001}world", true)]
#[case("hello\u{000A}world", true)]
#[case("hello\u{000D}world", true)]
#[case("hello\u{0009}world", true)]
#[case("", false)]
#[case(" ", false)]
#[case("hello\u{007E}world", true)]
#[case("hello\u{007D}world", false)]
fn test_needs_utf8_encoding(#[case] input: &str, #[case] expected: bool) {
let path = ZipFilePath::from_str(input);
assert_eq!(
path.needs_utf8_encoding(),
expected,
"Failed for input: {}",
input
);
}
#[test]
fn test_path_lifetime_test() {
let normalized_path = ZipFilePath::from_bytes(b"test.txt")
.try_normalize()
.unwrap();
assert_eq!(normalized_path.as_ref(), "test.txt");
assert_eq!(normalized_path.len(), 8);
}
#[test]
fn test_raw_path_lifetime_preservation() {
use std::str::Utf8Error;
fn file_path_utf8<'a>(path: ZipFilePath<RawPath<'a>>) -> Result<&'a str, Utf8Error> {
std::str::from_utf8(path.as_bytes())
}
let raw_path = ZipFilePath::from_bytes(b"test/file.txt");
let result = file_path_utf8(raw_path).unwrap();
assert_eq!(result, "test/file.txt");
}
}