1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
use crate::{
ReadNpyError, ReadNpyExt, ReadableElement, WritableElement, WriteNpyError, WriteNpyExt,
};
use ndarray::prelude::*;
use ndarray::{Data, DataOwned};
use std::error::Error;
use std::fmt;
use std::io::{BufWriter, Read, Seek, Write};
use zip::result::ZipError;
use zip::write::SimpleFileOptions;
use zip::{CompressionMethod, ZipArchive, ZipWriter};
/// An error writing a `.npz` file.
#[derive(Debug)]
pub enum WriteNpzError {
/// An error caused by the zip file.
Zip(ZipError),
/// An error caused by writing an inner `.npy` file.
Npy(WriteNpyError),
}
impl Error for WriteNpzError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
WriteNpzError::Zip(err) => Some(err),
WriteNpzError::Npy(err) => Some(err),
}
}
}
impl fmt::Display for WriteNpzError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
WriteNpzError::Zip(err) => write!(f, "zip file error: {}", err),
WriteNpzError::Npy(err) => write!(f, "error writing npy file to npz archive: {}", err),
}
}
}
impl From<ZipError> for WriteNpzError {
fn from(err: ZipError) -> WriteNpzError {
WriteNpzError::Zip(err)
}
}
impl From<WriteNpyError> for WriteNpzError {
fn from(err: WriteNpyError) -> WriteNpzError {
WriteNpzError::Npy(err)
}
}
/// Writer for `.npz` files.
///
/// Note that the inner [`ZipWriter`] is wrapped in a [`BufWriter`] when
/// writing each array with [`.add_array()`](NpzWriter::add_array). If desired,
/// you could additionally buffer the innermost writer (e.g. the
/// [`File`](std::fs::File) when writing to a file) by wrapping it in a
/// [`BufWriter`]. This may be somewhat beneficial if the arrays are large and
/// have non-standard layouts but may decrease performance if the arrays have
/// standard or Fortran layout, so it's not recommended without testing to
/// compare.
///
/// # Example
///
/// ```no_run
/// use ndarray::{array, aview0, Array1, Array2};
/// use ndarray_npy::NpzWriter;
/// use std::fs::File;
///
/// let mut npz = NpzWriter::new(File::create("arrays.npz")?);
/// let a: Array2<i32> = array![[1, 2, 3], [4, 5, 6]];
/// let b: Array1<i32> = array![7, 8, 9];
/// npz.add_array("a", &a)?;
/// npz.add_array("b", &b)?;
/// npz.add_array("c", &aview0(&10))?;
/// npz.finish()?;
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
pub struct NpzWriter<W: Write + Seek> {
zip: ZipWriter<W>,
options: SimpleFileOptions,
}
impl<W: Write + Seek> NpzWriter<W> {
/// Create a new `.npz` file without compression. See [`numpy.savez`].
///
/// [`numpy.savez`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.savez.html
pub fn new(writer: W) -> NpzWriter<W> {
NpzWriter {
zip: ZipWriter::new(writer),
options: SimpleFileOptions::default().compression_method(CompressionMethod::Stored),
}
}
/// Creates a new `.npz` file with compression. See [`numpy.savez_compressed`].
///
/// [`numpy.savez_compressed`]: https://docs.scipy.org/doc/numpy/reference/generated/numpy.savez_compressed.html
#[cfg(feature = "compressed_npz")]
pub fn new_compressed(writer: W) -> NpzWriter<W> {
NpzWriter {
zip: ZipWriter::new(writer),
options: SimpleFileOptions::default().compression_method(CompressionMethod::Deflated),
}
}
/// Creates a new `.npz` file with the specified options.
///
/// This allows you to use a custom compression method, such as [`CompressionMethod::Zstd`] or
/// set other options.
///
/// Make sure to enable the relevant features of the `zip` crate to use
/// [`CompressionMethod::Zstd`] or other features.
pub fn new_with_options(writer: W, options: SimpleFileOptions) -> NpzWriter<W> {
NpzWriter {
zip: ZipWriter::new(writer),
options,
}
}
/// Adds an array with the specified `name` to the `.npz` file.
///
/// Note that a `.npy` extension will be appended to `name`; this matches NumPy's behavior.
///
/// To write a scalar value, create a zero-dimensional array using [`arr0`](ndarray::arr0) or
/// [`aview0`](ndarray::aview0).
pub fn add_array<N, S, D>(
&mut self,
name: N,
array: &ArrayBase<S, D>,
) -> Result<(), WriteNpzError>
where
N: Into<String>,
S::Elem: WritableElement,
S: Data,
D: Dimension,
{
self.zip.start_file(name.into() + ".npy", self.options)?;
// Buffering when writing individual arrays is beneficial even when the
// underlying writer is `Cursor<Vec<u8>>` instead of a real file. The
// only exception I saw in testing was the "compressed, in-memory
// writer, standard layout case". See
// https://github.com/jturner314/ndarray-npy/issues/50#issuecomment-812802481
// for details.
array.write_npy(BufWriter::new(&mut self.zip))?;
Ok(())
}
/// Calls [`.finish()`](ZipWriter::finish) on the zip file and
/// [`.flush()`](Write::flush) on the writer, and then returns the writer.
///
/// This finishes writing the remaining zip structures and flushes the
/// writer. While dropping will automatically attempt to finish the zip
/// file and (for writers that flush on drop, such as
/// [`BufWriter`](std::io::BufWriter)) flush the writer, any errors that
/// occur during drop will be silently ignored. So, it's necessary to call
/// `.finish()` to properly handle errors.
pub fn finish(self) -> Result<W, WriteNpzError> {
let mut writer = self.zip.finish()?;
writer.flush().map_err(ZipError::from)?;
Ok(writer)
}
}
/// An error reading a `.npz` file.
#[derive(Debug)]
pub enum ReadNpzError {
/// An error caused by the zip archive.
Zip(ZipError),
/// An error caused by reading an inner `.npy` file.
Npy(ReadNpyError),
}
impl Error for ReadNpzError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
ReadNpzError::Zip(err) => Some(err),
ReadNpzError::Npy(err) => Some(err),
}
}
}
impl fmt::Display for ReadNpzError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ReadNpzError::Zip(err) => write!(f, "zip file error: {}", err),
ReadNpzError::Npy(err) => write!(f, "error reading npy file in npz archive: {}", err),
}
}
}
impl From<ZipError> for ReadNpzError {
fn from(err: ZipError) -> ReadNpzError {
ReadNpzError::Zip(err)
}
}
impl From<ReadNpyError> for ReadNpzError {
fn from(err: ReadNpyError) -> ReadNpzError {
ReadNpzError::Npy(err)
}
}
/// Reader for `.npz` files.
///
/// # Example
///
/// ```no_run
/// use ndarray::{Array1, Array2};
/// use ndarray_npy::NpzReader;
/// use std::fs::File;
///
/// let mut npz = NpzReader::new(File::open("arrays.npz")?)?;
/// let a: Array2<i32> = npz.by_name("a")?;
/// let b: Array1<i32> = npz.by_name("b")?;
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
pub struct NpzReader<R: Read + Seek> {
zip: ZipArchive<R>,
}
impl<R: Read + Seek> NpzReader<R> {
/// Creates a new `.npz` file reader.
pub fn new(reader: R) -> Result<NpzReader<R>, ReadNpzError> {
Ok(NpzReader {
zip: ZipArchive::new(reader)?,
})
}
/// Returns `true` iff the `.npz` file doesn't contain any arrays.
pub fn is_empty(&self) -> bool {
self.zip.len() == 0
}
/// Returns the number of arrays in the `.npz` file.
pub fn len(&self) -> usize {
self.zip.len()
}
/// Returns the names of all of the arrays in the file.
///
/// Note that a single ".npy" suffix (if present) will be stripped from each name; this matches
/// NumPy's behavior.
pub fn names(&mut self) -> Result<Vec<String>, ReadNpzError> {
Ok((0..self.zip.len())
.map(|i| {
let file = self.zip.by_index(i)?;
let name = file.name();
let stripped = name.strip_suffix(".npy").unwrap_or(name);
Ok(stripped.to_owned())
})
.collect::<Result<_, ZipError>>()?)
}
/// Reads an array by name.
///
/// Note that this first checks for `name` in the `.npz` file, and if that is not present,
/// checks for `format!("{name}.npy")`. This matches NumPy's behavior.
pub fn by_name<S, D>(&mut self, name: &str) -> Result<ArrayBase<S, D>, ReadNpzError>
where
S::Elem: ReadableElement,
S: DataOwned,
D: Dimension,
{
// TODO: Combine the two cases into a single `let file = match { ... }` once
// https://github.com/rust-lang/rust/issues/47680 is resolved.
match self.zip.by_name(name) {
Ok(file) => return Ok(ArrayBase::<S, D>::read_npy(file)?),
Err(ZipError::FileNotFound) => {}
Err(err) => return Err(err.into()),
};
Ok(ArrayBase::<S, D>::read_npy(
self.zip.by_name(&format!("{name}.npy"))?,
)?)
}
/// Reads an array by index in the `.npz` file.
pub fn by_index<S, D>(&mut self, index: usize) -> Result<ArrayBase<S, D>, ReadNpzError>
where
S::Elem: ReadableElement,
S: DataOwned,
D: Dimension,
{
Ok(ArrayBase::<S, D>::read_npy(self.zip.by_index(index)?)?)
}
}