#[cfg(feature = "mmap")]
use std::fs::File;
use std::io::Read;
#[cfg(feature = "mmap")]
use std::path::Path;
use crate::error::{Result, ZiporaError};
use crate::io::var_int::VarInt;
#[cfg(feature = "mmap")]
use memmap2::Mmap;
pub trait DataInput {
fn read_u8(&mut self) -> Result<u8>;
fn read_u16(&mut self) -> Result<u16>;
fn read_u32(&mut self) -> Result<u32>;
fn read_u64(&mut self) -> Result<u64>;
fn read_var_int(&mut self) -> Result<u64>;
fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()>;
fn read_vec(&mut self, len: usize) -> Result<Vec<u8>> {
let mut buf = vec![0u8; len];
self.read_bytes(&mut buf)?;
Ok(buf)
}
fn read_length_prefixed_bytes(&mut self) -> Result<Vec<u8>> {
let len = self.read_var_int()? as usize;
self.read_vec(len)
}
fn read_string(&mut self, len: usize) -> Result<String> {
let bytes = self.read_vec(len)?;
String::from_utf8(bytes)
.map_err(|e| ZiporaError::invalid_data(format!("Invalid UTF-8 string: {}", e)))
}
fn read_length_prefixed_string(&mut self) -> Result<String> {
let bytes = self.read_length_prefixed_bytes()?;
String::from_utf8(bytes)
.map_err(|e| ZiporaError::invalid_data(format!("Invalid UTF-8 string: {}", e)))
}
fn skip(&mut self, n: usize) -> Result<()>;
fn position(&self) -> Option<u64> {
None
}
fn has_remaining(&self) -> Option<bool> {
None
}
}
pub struct SliceDataInput<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> SliceDataInput<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, position: 0 }
}
pub fn pos(&self) -> usize {
self.position
}
pub fn remaining(&self) -> usize {
self.data.len().saturating_sub(self.position)
}
pub fn has_more(&self) -> bool {
self.position < self.data.len()
}
pub fn remaining_slice(&self) -> &'a [u8] {
&self.data[self.position..]
}
}
impl<'a> DataInput for SliceDataInput<'a> {
fn read_u8(&mut self) -> Result<u8> {
if self.position >= self.data.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let value = self.data[self.position];
self.position += 1;
Ok(value)
}
fn read_u16(&mut self) -> Result<u16> {
if self.position + 2 > self.data.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.data[self.position..self.position + 2];
self.position += 2;
Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
}
fn read_u32(&mut self) -> Result<u32> {
if self.position + 4 > self.data.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.data[self.position..self.position + 4];
self.position += 4;
Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
fn read_u64(&mut self) -> Result<u64> {
if self.position + 8 > self.data.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.data[self.position..self.position + 8];
self.position += 8;
Ok(u64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
fn read_var_int(&mut self) -> Result<u64> {
VarInt::read_from(self)
}
fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()> {
if self.position + buf.len() > self.data.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
buf.copy_from_slice(&self.data[self.position..self.position + buf.len()]);
self.position += buf.len();
Ok(())
}
fn skip(&mut self, n: usize) -> Result<()> {
if self.position + n > self.data.len() {
return Err(ZiporaError::io_error("Cannot skip past end of data"));
}
self.position += n;
Ok(())
}
fn position(&self) -> Option<u64> {
Some(self.position as u64)
}
fn has_remaining(&self) -> Option<bool> {
Some(self.position < self.data.len())
}
}
pub struct ReaderDataInput<R> {
reader: R,
position: u64,
}
impl<R: Read> ReaderDataInput<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
position: 0,
}
}
pub fn pos(&self) -> u64 {
self.position
}
pub fn into_inner(self) -> R {
self.reader
}
}
impl<R: Read> DataInput for ReaderDataInput<R> {
fn read_u8(&mut self) -> Result<u8> {
let mut buf = [0u8; 1];
self.reader
.read_exact(&mut buf)
.map_err(|e| ZiporaError::io_error(format!("Failed to read u8: {}", e)))?;
self.position += 1;
Ok(buf[0])
}
fn read_u16(&mut self) -> Result<u16> {
let mut buf = [0u8; 2];
self.reader
.read_exact(&mut buf)
.map_err(|e| ZiporaError::io_error(format!("Failed to read u16: {}", e)))?;
self.position += 2;
Ok(u16::from_le_bytes(buf))
}
fn read_u32(&mut self) -> Result<u32> {
let mut buf = [0u8; 4];
self.reader
.read_exact(&mut buf)
.map_err(|e| ZiporaError::io_error(format!("Failed to read u32: {}", e)))?;
self.position += 4;
Ok(u32::from_le_bytes(buf))
}
fn read_u64(&mut self) -> Result<u64> {
let mut buf = [0u8; 8];
self.reader
.read_exact(&mut buf)
.map_err(|e| ZiporaError::io_error(format!("Failed to read u64: {}", e)))?;
self.position += 8;
Ok(u64::from_le_bytes(buf))
}
fn read_var_int(&mut self) -> Result<u64> {
VarInt::read_from(self)
}
fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()> {
self.reader
.read_exact(buf)
.map_err(|e| ZiporaError::io_error(format!("Failed to read bytes: {}", e)))?;
self.position += buf.len() as u64;
Ok(())
}
fn skip(&mut self, n: usize) -> Result<()> {
let mut buf = vec![0u8; n.min(8192)]; let mut remaining = n;
while remaining > 0 {
let to_read = remaining.min(buf.len());
self.reader
.read_exact(&mut buf[..to_read])
.map_err(|e| ZiporaError::io_error(format!("Failed to skip bytes: {}", e)))?;
remaining -= to_read;
}
self.position += n as u64;
Ok(())
}
fn position(&self) -> Option<u64> {
Some(self.position)
}
}
#[cfg(feature = "mmap")]
pub struct MmapDataInput {
mmap: Mmap,
position: usize,
}
#[cfg(feature = "mmap")]
impl MmapDataInput {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path)
.map_err(|e| ZiporaError::io_error(format!("Failed to open file: {}", e)))?;
let mmap = unsafe {
Mmap::map(&file)
.map_err(|e| ZiporaError::io_error(format!("Failed to memory map file: {}", e)))?
};
Ok(Self { mmap, position: 0 })
}
pub fn from_mmap(mmap: Mmap) -> Self {
Self { mmap, position: 0 }
}
pub fn pos(&self) -> usize {
self.position
}
#[inline]
pub fn len(&self) -> usize {
self.mmap.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.mmap.is_empty()
}
pub fn remaining(&self) -> usize {
self.len().saturating_sub(self.position)
}
pub fn remaining_slice(&self) -> &[u8] {
&self.mmap[self.position..]
}
#[inline]
pub fn as_slice(&self) -> &[u8] {
&self.mmap
}
}
#[cfg(feature = "mmap")]
impl DataInput for MmapDataInput {
fn read_u8(&mut self) -> Result<u8> {
if self.position >= self.mmap.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let value = self.mmap[self.position];
self.position += 1;
Ok(value)
}
fn read_u16(&mut self) -> Result<u16> {
if self.position + 2 > self.mmap.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.mmap[self.position..self.position + 2];
self.position += 2;
Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
}
fn read_u32(&mut self) -> Result<u32> {
if self.position + 4 > self.mmap.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.mmap[self.position..self.position + 4];
self.position += 4;
Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
fn read_u64(&mut self) -> Result<u64> {
if self.position + 8 > self.mmap.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
let bytes = &self.mmap[self.position..self.position + 8];
self.position += 8;
Ok(u64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
fn read_var_int(&mut self) -> Result<u64> {
VarInt::read_from(self)
}
fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()> {
if self.position + buf.len() > self.mmap.len() {
return Err(ZiporaError::io_error("Unexpected end of data"));
}
buf.copy_from_slice(&self.mmap[self.position..self.position + buf.len()]);
self.position += buf.len();
Ok(())
}
fn skip(&mut self, n: usize) -> Result<()> {
if self.position + n > self.mmap.len() {
return Err(ZiporaError::io_error("Cannot skip past end of data"));
}
self.position += n;
Ok(())
}
fn position(&self) -> Option<u64> {
Some(self.position as u64)
}
fn has_remaining(&self) -> Option<bool> {
Some(self.position < self.mmap.len())
}
}
pub fn from_slice(data: &[u8]) -> SliceDataInput<'_> {
SliceDataInput::new(data)
}
pub fn from_reader<R: Read>(reader: R) -> ReaderDataInput<R> {
ReaderDataInput::new(reader)
}
#[cfg(feature = "mmap")]
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<MmapDataInput> {
MmapDataInput::open(path)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_slice_data_input_basic() {
let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let mut input = SliceDataInput::new(&data);
assert_eq!(input.position(), Some(0));
assert_eq!(input.has_remaining(), Some(true));
assert_eq!(input.read_u8().unwrap(), 1);
assert_eq!(input.pos(), 1);
assert_eq!(input.read_u16().unwrap(), u16::from_le_bytes([2, 3]));
assert_eq!(input.pos(), 3);
assert_eq!(input.read_u32().unwrap(), u32::from_le_bytes([4, 5, 6, 7]));
assert_eq!(input.pos(), 7);
let mut buf = [0u8; 2];
input.read_bytes(&mut buf).unwrap();
assert_eq!(buf, [8, 9]);
assert_eq!(input.remaining(), 1);
assert_eq!(input.read_u8().unwrap(), 10);
assert_eq!(input.remaining(), 0);
assert_eq!(input.has_remaining(), Some(false));
assert!(input.read_u8().is_err());
}
#[test]
fn test_slice_data_input_skip() {
let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let mut input = SliceDataInput::new(&data);
input.skip(3).unwrap();
assert_eq!(input.pos(), 3);
assert_eq!(input.read_u8().unwrap(), 4);
input.skip(2).unwrap();
assert_eq!(input.read_u8().unwrap(), 7);
assert!(input.skip(10).is_err());
}
#[test]
fn test_reader_data_input() {
let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let cursor = Cursor::new(data);
let mut input = ReaderDataInput::new(cursor);
assert_eq!(input.position(), Some(0));
assert_eq!(input.read_u8().unwrap(), 1);
assert_eq!(input.pos(), 1);
assert_eq!(input.read_u16().unwrap(), u16::from_le_bytes([2, 3]));
assert_eq!(input.pos(), 3);
let mut buf = [0u8; 3];
input.read_bytes(&mut buf).unwrap();
assert_eq!(buf, [4, 5, 6]);
assert_eq!(input.pos(), 6);
input.skip(2).unwrap();
assert_eq!(input.pos(), 8);
assert_eq!(input.read_u8().unwrap(), 9);
}
#[test]
fn test_var_int_encoding() {
let test_values = [0, 127, 128, 16383, 16384, 2097151, 2097152, u64::MAX];
for &value in &test_values {
let mut encoded = Vec::new();
VarInt::write_to(&mut encoded, value).unwrap();
let mut input = SliceDataInput::new(&encoded);
let decoded = input.read_var_int().unwrap();
assert_eq!(value, decoded, "Failed for value {}", value);
}
}
#[test]
fn test_string_operations() {
let test_string = "Hello, δΈη! π¦";
let bytes = test_string.as_bytes();
let mut input = SliceDataInput::new(bytes);
let decoded = input.read_string(bytes.len()).unwrap();
assert_eq!(test_string, decoded);
let mut encoded = Vec::new();
VarInt::write_to(&mut encoded, bytes.len() as u64).unwrap();
encoded.extend_from_slice(bytes);
let mut input = SliceDataInput::new(&encoded);
let decoded = input.read_length_prefixed_string().unwrap();
assert_eq!(test_string, decoded);
}
#[test]
fn test_length_prefixed_bytes() {
let data = b"test data";
let mut encoded = Vec::new();
VarInt::write_to(&mut encoded, data.len() as u64).unwrap();
encoded.extend_from_slice(data);
let mut input = SliceDataInput::new(&encoded);
let decoded = input.read_length_prefixed_bytes().unwrap();
assert_eq!(data, &decoded[..]);
}
#[cfg(feature = "mmap")]
#[test]
fn test_mmap_data_input() {
let mut temp_file = NamedTempFile::new().unwrap();
let data = b"Hello, memory mapped world!";
temp_file.write_all(data).unwrap();
temp_file.flush().unwrap();
let mut input = MmapDataInput::open(temp_file.path()).unwrap();
assert_eq!(input.len(), data.len());
assert!(!input.is_empty());
assert_eq!(input.remaining(), data.len());
let first_word = input.read_vec(5).unwrap();
assert_eq!(&first_word, b"Hello");
input.skip(2).unwrap(); let second_word = input.read_vec(6).unwrap();
assert_eq!(&second_word, b"memory");
let remaining_data = input.remaining_slice();
assert_eq!(remaining_data, b" mapped world!");
}
#[test]
fn test_convenience_functions() {
let data = [1, 2, 3, 4];
let mut input = from_slice(&data);
assert_eq!(input.read_u32().unwrap(), u32::from_le_bytes([1, 2, 3, 4]));
let cursor = Cursor::new(vec![5, 6, 7, 8]);
let mut input = from_reader(cursor);
assert_eq!(input.read_u32().unwrap(), u32::from_le_bytes([5, 6, 7, 8]));
}
}