pub mod byte_slice_input;
pub mod checksum;
pub mod checksum_input;
pub mod fs;
pub mod memory;
pub mod mmap;
pub use checksum::CRC32;
pub use fs::FSDirectory;
pub use memory::{MemoryDirectory, MemoryIndexOutput};
pub use mmap::MmapDirectory;
pub use crate::codecs::lucene90::compound_reader::CompoundDirectory;
use std::collections::HashMap;
use std::io;
#[derive(Clone, Debug)]
pub struct SegmentFile {
pub name: String,
pub data: Vec<u8>,
}
pub trait DataOutput {
fn write_byte(&mut self, b: u8) -> io::Result<()>;
fn write_bytes(&mut self, buf: &[u8]) -> io::Result<()>;
fn write_le_int(&mut self, i: i32) -> io::Result<()> {
self.write_bytes(&i.to_le_bytes())
}
fn write_le_short(&mut self, i: i16) -> io::Result<()> {
self.write_bytes(&i.to_le_bytes())
}
fn write_le_long(&mut self, i: i64) -> io::Result<()> {
self.write_bytes(&i.to_le_bytes())
}
fn write_vint(&mut self, i: i32) -> io::Result<()> {
crate::encoding::varint::write_vint(&mut DataOutputWriter(self), i)
}
fn write_vlong(&mut self, i: i64) -> io::Result<()> {
crate::encoding::varint::write_vlong(&mut DataOutputWriter(self), i)
}
fn write_zint(&mut self, i: i32) -> io::Result<()> {
crate::encoding::varint::write_zint(&mut DataOutputWriter(self), i)
}
fn write_zlong(&mut self, i: i64) -> io::Result<()> {
crate::encoding::varint::write_zlong(&mut DataOutputWriter(self), i)
}
fn write_signed_vlong(&mut self, i: i64) -> io::Result<()> {
crate::encoding::varint::write_signed_vlong(&mut DataOutputWriter(self), i)
}
fn write_be_int(&mut self, i: i32) -> io::Result<()> {
self.write_bytes(&i.to_be_bytes())
}
fn write_be_long(&mut self, i: i64) -> io::Result<()> {
self.write_bytes(&i.to_be_bytes())
}
fn write_string(&mut self, s: &str) -> io::Result<()> {
crate::encoding::string::write_string(&mut DataOutputWriter(self), s)
}
fn write_set_of_strings(&mut self, set: &[String]) -> io::Result<()> {
crate::encoding::string::write_set_of_strings(&mut DataOutputWriter(self), set)
}
fn write_map_of_strings(&mut self, map: &HashMap<String, String>) -> io::Result<()> {
crate::encoding::string::write_map_of_strings(&mut DataOutputWriter(self), map)
}
fn write_group_vints(&mut self, values: &[i32], limit: usize) -> io::Result<()> {
crate::encoding::group_vint::write_group_vints(&mut DataOutputWriter(self), values, limit)
}
}
pub struct DataOutputWriter<'a, T: ?Sized>(pub &'a mut T);
impl<T: DataOutput + ?Sized> io::Write for DataOutputWriter<'_, T> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.0.write_bytes(buf)?;
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
pub struct VecOutput<'a>(pub &'a mut Vec<u8>);
impl DataOutput for VecOutput<'_> {
fn write_byte(&mut self, b: u8) -> io::Result<()> {
self.0.push(b);
Ok(())
}
fn write_bytes(&mut self, buf: &[u8]) -> io::Result<()> {
self.0.extend_from_slice(buf);
Ok(())
}
}
pub trait DataInput {
fn read_byte(&mut self) -> io::Result<u8>;
fn read_bytes(&mut self, buf: &mut [u8]) -> io::Result<()>;
fn skip_bytes(&mut self, num_bytes: u64) -> io::Result<()> {
let mut remaining = num_bytes;
let mut skip_buf = [0u8; 1024];
while remaining > 0 {
let to_read = remaining.min(skip_buf.len() as u64) as usize;
self.read_bytes(&mut skip_buf[..to_read])?;
remaining -= to_read as u64;
}
Ok(())
}
fn read_le_short(&mut self) -> io::Result<i16> {
let mut buf = [0u8; 2];
self.read_bytes(&mut buf)?;
Ok(i16::from_le_bytes(buf))
}
fn read_le_int(&mut self) -> io::Result<i32> {
let mut buf = [0u8; 4];
self.read_bytes(&mut buf)?;
Ok(i32::from_le_bytes(buf))
}
fn read_le_long(&mut self) -> io::Result<i64> {
let mut buf = [0u8; 8];
self.read_bytes(&mut buf)?;
Ok(i64::from_le_bytes(buf))
}
fn read_vint(&mut self) -> io::Result<i32> {
crate::encoding::varint::read_vint(&mut DataInputReader(self))
}
fn read_vlong(&mut self) -> io::Result<i64> {
crate::encoding::varint::read_vlong(&mut DataInputReader(self))
}
fn read_zint(&mut self) -> io::Result<i32> {
crate::encoding::varint::read_zint(&mut DataInputReader(self))
}
fn read_zlong(&mut self) -> io::Result<i64> {
crate::encoding::varint::read_zlong(&mut DataInputReader(self))
}
fn read_be_int(&mut self) -> io::Result<i32> {
let mut buf = [0u8; 4];
self.read_bytes(&mut buf)?;
Ok(i32::from_be_bytes(buf))
}
fn read_be_long(&mut self) -> io::Result<i64> {
let mut buf = [0u8; 8];
self.read_bytes(&mut buf)?;
Ok(i64::from_be_bytes(buf))
}
fn read_string(&mut self) -> io::Result<String> {
crate::encoding::string::read_string(&mut DataInputReader(self))
}
fn read_set_of_strings(&mut self) -> io::Result<Vec<String>> {
crate::encoding::string::read_set_of_strings(&mut DataInputReader(self))
}
fn read_map_of_strings(&mut self) -> io::Result<HashMap<String, String>> {
crate::encoding::string::read_map_of_strings(&mut DataInputReader(self))
}
fn read_group_vints(&mut self, values: &mut [i32], limit: usize) -> io::Result<()> {
crate::encoding::group_vint::read_group_vints(&mut DataInputReader(self), values, limit)
}
}
pub struct DataInputReader<'a, T: ?Sized>(pub &'a mut T);
impl<T: DataInput + ?Sized> io::Read for DataInputReader<'_, T> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if buf.is_empty() {
return Ok(0);
}
buf[0] = self.0.read_byte()?;
Ok(1)
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
self.0.read_bytes(buf)
}
}
pub fn read_vint(reader: &mut impl io::Read) -> io::Result<i32> {
crate::encoding::varint::read_vint(reader)
}
pub fn encode_vint(writer: &mut impl io::Write, val: i32) -> io::Result<()> {
crate::encoding::varint::write_vint(writer, val)
}
pub trait IndexInput: DataInput + Send {
fn name(&self) -> &str;
fn file_pointer(&self) -> u64;
fn seek(&mut self, pos: u64) -> io::Result<()>;
fn length(&self) -> u64;
fn slice(&self, description: &str, offset: u64, length: u64)
-> io::Result<Box<dyn IndexInput>>;
fn random_access(&self) -> io::Result<Box<dyn RandomAccessInput>>;
}
pub trait RandomAccessInput: Send {
fn read_byte_at(&self, pos: u64) -> io::Result<u8>;
fn read_le_short_at(&self, pos: u64) -> io::Result<i16>;
fn read_le_int_at(&self, pos: u64) -> io::Result<i32>;
fn read_le_long_at(&self, pos: u64) -> io::Result<i64>;
}
pub trait IndexOutput: DataOutput + Send {
fn name(&self) -> &str;
fn file_pointer(&self) -> u64;
fn checksum(&self) -> u64;
fn align_file_pointer(&mut self, alignment: usize) -> io::Result<u64> {
let pos = self.file_pointer();
let aligned = align_offset(pos, alignment);
let padding = (aligned - pos) as usize;
if padding > 0 {
const ZEROS: [u8; 16] = [0u8; 16];
self.write_bytes(&ZEROS[..padding])?;
}
Ok(aligned)
}
}
pub(crate) fn align_offset(offset: u64, alignment: usize) -> u64 {
let a = alignment as u64;
(offset + a - 1) & !(a - 1)
}
pub type SharedDirectory = std::sync::Mutex<Box<dyn Directory>>;
pub trait Directory: Send {
fn create_output(&mut self, name: &str) -> io::Result<Box<dyn IndexOutput>>;
fn open_input(&self, name: &str) -> io::Result<Box<dyn IndexInput>>;
fn list_all(&self) -> io::Result<Vec<String>>;
fn file_length(&self, name: &str) -> io::Result<u64>;
fn delete_file(&mut self, name: &str) -> io::Result<()>;
fn rename(&mut self, source: &str, dest: &str) -> io::Result<()>;
fn read_file(&self, name: &str) -> io::Result<Vec<u8>>;
fn write_file(&mut self, name: &str, data: &[u8]) -> io::Result<()> {
let mut out = self.create_output(name)?;
out.write_bytes(data)?;
Ok(())
}
fn sync(&self, names: &[&str]) -> io::Result<()> {
let _ = names;
Ok(())
}
fn sync_meta_data(&self) -> io::Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
struct ByteSliceInput<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> ByteSliceInput<'a> {
fn new(data: &'a [u8]) -> Self {
Self { data, pos: 0 }
}
}
impl DataInput for ByteSliceInput<'_> {
fn read_byte(&mut self) -> io::Result<u8> {
if self.pos >= self.data.len() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "end of input"));
}
let b = self.data[self.pos];
self.pos += 1;
Ok(b)
}
fn read_bytes(&mut self, buf: &mut [u8]) -> io::Result<()> {
let end = self.pos + buf.len();
if end > self.data.len() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "end of input"));
}
buf.copy_from_slice(&self.data[self.pos..end]);
self.pos = end;
Ok(())
}
}
#[test]
fn test_write_le_int() {
let mut buf = Vec::new();
VecOutput(&mut buf).write_le_int(0x04030201_i32).unwrap();
assert_eq!(buf, [0x01, 0x02, 0x03, 0x04]);
}
#[test]
fn test_write_le_long() {
let mut buf = Vec::new();
VecOutput(&mut buf)
.write_le_long(0x0807060504030201_i64)
.unwrap();
assert_eq!(buf, [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]);
}
#[test]
fn test_write_le_short() {
let mut buf = Vec::new();
VecOutput(&mut buf).write_le_short(0x0201_i16).unwrap();
assert_eq!(buf, [0x01, 0x02]);
}
#[test]
fn test_write_be_int() {
let mut buf = Vec::new();
VecOutput(&mut buf).write_be_int(0x04030201_i32).unwrap();
assert_eq!(buf, [0x04, 0x03, 0x02, 0x01]);
}
#[test]
fn test_write_be_long() {
let mut buf = Vec::new();
VecOutput(&mut buf)
.write_be_long(0x0807060504030201_i64)
.unwrap();
assert_eq!(buf, [0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01]);
}
#[test]
fn test_align_offset() {
assert_eq!(align_offset(0, 8), 0);
assert_eq!(align_offset(1, 8), 8);
assert_eq!(align_offset(7, 8), 8);
assert_eq!(align_offset(8, 8), 8);
assert_eq!(align_offset(9, 8), 16);
}
#[test]
fn test_roundtrip_le_short() {
for &val in &[0_i16, 1, -1, 0x0201, i16::MIN, i16::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_le_short(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_le_short().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_le_int() {
for &val in &[0_i32, 1, -1, 0x04030201, i32::MIN, i32::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_le_int(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_le_int().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_le_long() {
for &val in &[0_i64, 1, -1, 0x0807060504030201, i64::MIN, i64::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_le_long(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_le_long().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_be_int() {
for &val in &[0_i32, 1, -1, 0x04030201, i32::MIN, i32::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_be_int(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_be_int().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_be_long() {
for &val in &[0_i64, 1, -1, 0x0807060504030201, i64::MIN, i64::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_be_long(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_be_long().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_vint() {
for &val in &[0, 1, 127, 128, 16383, 16384, i32::MAX, -1] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_vint(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_vint().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_vlong() {
for &val in &[0_i64, 1, 127, 128, 16384, i64::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_vlong(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_vlong().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_zint() {
for &val in &[0, 1, -1, 127, -128, i32::MIN, i32::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_zint(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_zint().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_zlong() {
for &val in &[0_i64, 1, -1, 127, -128, i64::MIN, i64::MAX] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_zlong(val).unwrap();
let decoded = ByteSliceInput::new(&buf).read_zlong().unwrap();
assert_eq!(decoded, val);
}
}
#[test]
fn test_roundtrip_string() {
for s in &["", "hello", "hello world", "\u{00e9}\u{00e8}"] {
let mut buf = Vec::new();
VecOutput(&mut buf).write_string(s).unwrap();
let decoded = ByteSliceInput::new(&buf).read_string().unwrap();
assert_eq!(&decoded, s);
}
}
#[test]
fn test_roundtrip_set_of_strings() {
let set = vec!["hello".to_string(), "world".to_string()];
let mut buf = Vec::new();
VecOutput(&mut buf).write_set_of_strings(&set).unwrap();
let decoded = ByteSliceInput::new(&buf).read_set_of_strings().unwrap();
assert_eq!(decoded, set);
}
#[test]
fn test_roundtrip_map_of_strings() {
let mut map = HashMap::new();
map.insert("k1".to_string(), "v1".to_string());
map.insert("k2".to_string(), "v2".to_string());
let mut buf = Vec::new();
VecOutput(&mut buf).write_map_of_strings(&map).unwrap();
let decoded = ByteSliceInput::new(&buf).read_map_of_strings().unwrap();
assert_eq!(decoded, map);
}
#[test]
fn test_roundtrip_group_vints() {
let values = [1, 256, 3, 4, 5, 6];
let mut buf = Vec::new();
VecOutput(&mut buf).write_group_vints(&values, 6).unwrap();
let mut decoded = [0i32; 6];
ByteSliceInput::new(&buf)
.read_group_vints(&mut decoded, 6)
.unwrap();
assert_eq!(decoded, values);
}
#[test]
fn test_skip_bytes() {
let data = [1u8, 2, 3, 4, 5];
let mut input = ByteSliceInput::new(&data);
input.skip_bytes(3).unwrap();
assert_eq!(input.read_byte().unwrap(), 4);
}
#[test]
fn test_read_byte_eof() {
let data = [];
let mut input = ByteSliceInput::new(&data);
assert_err!(input.read_byte());
}
#[test]
fn test_mixed_primitives() {
let mut buf = Vec::new();
let mut out = VecOutput(&mut buf);
out.write_be_int(0x12345678).unwrap();
out.write_vint(42).unwrap();
out.write_string("test").unwrap();
out.write_le_long(0xDEADBEEF).unwrap();
let mut input = ByteSliceInput::new(&buf);
assert_eq!(input.read_be_int().unwrap(), 0x12345678);
assert_eq!(input.read_vint().unwrap(), 42);
assert_eq!(input.read_string().unwrap(), "test");
assert_eq!(input.read_le_long().unwrap(), 0xDEADBEEF);
}
}