1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use std::io::Read;
use fallible_streaming_iterator::FallibleStreamingIterator;
use crate::error::{Error, Result};
use super::super::CompressedBlock;
use super::util;
fn read_size<R: Read>(reader: &mut R) -> Result<(usize, usize)> {
let rows = match util::zigzag_i64(reader) {
Ok(a) => a,
Err(Error::Io(io_err)) => {
if let std::io::ErrorKind::UnexpectedEof = io_err.kind() {
return Ok((0, 0));
} else {
return Err(Error::Io(io_err));
}
}
Err(other) => return Err(other),
};
let bytes = util::zigzag_i64(reader)?;
Ok((rows as usize, bytes as usize))
}
fn read_block<R: Read>(
reader: &mut R,
block: &mut CompressedBlock,
file_marker: [u8; 16],
) -> Result<()> {
let (rows, bytes) = read_size(reader)?;
block.number_of_rows = rows;
if rows == 0 {
return Ok(());
};
block.data.clear();
block.data.resize(bytes, 0);
reader.read_exact(&mut block.data)?;
let mut marker = [0u8; 16];
reader.read_exact(&mut marker)?;
if marker != file_marker {
return Err(Error::ExternalFormat(
"Avro: the sync marker in the block does not correspond to the file marker".to_string(),
));
}
Ok(())
}
pub struct BlockStreamIterator<R: Read> {
buf: CompressedBlock,
reader: R,
file_marker: [u8; 16],
}
impl<R: Read> BlockStreamIterator<R> {
pub fn new(reader: R, file_marker: [u8; 16]) -> Self {
Self {
reader,
file_marker,
buf: CompressedBlock::new(0, vec![]),
}
}
pub fn buffer(&mut self) -> &mut CompressedBlock {
&mut self.buf
}
pub fn into_inner(self) -> (R, Vec<u8>) {
(self.reader, self.buf.data)
}
}
impl<R: Read> FallibleStreamingIterator for BlockStreamIterator<R> {
type Error = Error;
type Item = CompressedBlock;
fn advance(&mut self) -> Result<()> {
read_block(&mut self.reader, &mut self.buf, self.file_marker)?;
Ok(())
}
fn get(&self) -> Option<&Self::Item> {
if self.buf.number_of_rows > 0 {
Some(&self.buf)
} else {
None
}
}
}