ben/decode/read.rs
1//! Module documentation.
2//!
3//! This module provides functionality for extracting single assignment
4//! vectors from a BEN file.
5use serde_json::Error as SerdeError;
6use std::fmt::{self};
7use std::io::Cursor;
8use std::io::{self, Read};
9
10use super::{decode_ben32_line, decode_ben_line, rle_to_vec, BenDecoder, XBenDecoder};
11
12/// Types of errors that can occur during the extraction of assignments.
13#[derive(Debug)]
14pub enum SampleErrorKind {
15 /// Indicates the sample number is invalid. All sample numbers must be greater than 0.
16 InvalidSampleNumber,
17 /// Indicates the sample number was not found in the file. The last sample number is provided.
18 SampleNotFound { sample_number: usize },
19 /// Wrapper for IO errors.
20 IoError(io::Error),
21 /// Wrapper for JSON errors.
22 JsonError(SerdeError),
23}
24
25/// Error type for the extraction of assignments.
26#[derive(Debug)]
27pub struct SampleError {
28 pub kind: SampleErrorKind,
29}
30
31impl SampleError {
32 /// Create a new error from an IO error.
33 ///
34 /// # Arguments
35 ///
36 /// * `error` - The IO error to wrap.
37 pub fn new_io_error(error: io::Error) -> Self {
38 SampleError {
39 kind: SampleErrorKind::IoError(error),
40 }
41 }
42}
43
44impl fmt::Display for SampleError {
45 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
46 match &self.kind {
47 SampleErrorKind::InvalidSampleNumber => {
48 write!(
49 f,
50 "Invalid sample number. Sample number must be greater than 0"
51 )
52 }
53 SampleErrorKind::SampleNotFound { sample_number } => {
54 write!(
55 f,
56 "Sample number not found in file. \
57 Failed to find sample '{}'. \
58 Last sample seems to be '{}'",
59 sample_number,
60 sample_number - 1
61 )
62 }
63 SampleErrorKind::IoError(e) => {
64 write!(f, "IO Error: {}", e)
65 }
66 SampleErrorKind::JsonError(e) => {
67 write!(f, "JSON Error: {}", e)
68 }
69 }
70 }
71}
72
73impl std::error::Error for SampleError {
74 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
75 match &self.kind {
76 SampleErrorKind::InvalidSampleNumber => None,
77 SampleErrorKind::SampleNotFound { .. } => None,
78 SampleErrorKind::IoError(e) => Some(e),
79 SampleErrorKind::JsonError(e) => Some(e),
80 }
81 }
82}
83
84impl From<io::Error> for SampleError {
85 fn from(error: io::Error) -> Self {
86 SampleError::new_io_error(error)
87 }
88}
89
90impl From<SerdeError> for SampleError {
91 fn from(error: SerdeError) -> Self {
92 SampleError {
93 kind: SampleErrorKind::JsonError(error),
94 }
95 }
96}
97
98/// Extracts a single assignment from a binary-encoded data stream.
99///
100/// # Arguments
101///
102/// * `reader` - The reader to extract the assignment from.
103/// * `sample_number` - The sample number to extract.
104///
105/// # Returns
106///
107/// This function returns a `Result` containing a `Vec<u16>` of the assignment if successful,
108/// or a `SampleError` if an error occurred.
109///
110/// # Example
111///
112/// ```no_run
113/// use ben::decode::read::extract_assignment_ben;
114/// use std::{fs::File, io::BufReader};
115///
116/// let file = File::open("data.jsonl.ben").unwrap();
117/// let reader = BufReader::new(file);
118/// let sample_number = 2;
119///
120/// let result = extract_assignment_ben(reader, sample_number);
121/// match result {
122/// Ok(assignment) => {
123/// eprintln!("Extracted assignment: {:?}", assignment);
124/// }
125/// Err(e) => {
126/// eprintln!("Error: {}", e);
127/// }
128/// }
129/// ```
130///
131/// # Errors
132///
133/// This function can return a `SampleError` if an error occurs during the extraction process.
134/// The error can be one of the following:
135/// * `InvalidSampleNumber` - The sample number is invalid. All sample numbers must be greater than 0.
136/// * `SampleNotFound` - The sample number was not found in the file. The last sample number is provided.
137/// * `IoError` - An IO error occurred during the extraction process.
138/// * `JsonError` - A JSON error occurred during the extraction process.
139pub fn extract_assignment_ben<R: Read>(
140 mut reader: R,
141 sample_number: usize,
142) -> Result<Vec<u16>, SampleError> {
143 if sample_number == 0 {
144 return Err(SampleError {
145 kind: SampleErrorKind::InvalidSampleNumber,
146 });
147 }
148
149 let inner_decoder = BenDecoder::new(&mut reader).expect("Failed to create XBenDecoder");
150 let frame_iterator = inner_decoder.into_frames();
151
152 let mut current_sample = 1;
153 for frame in frame_iterator {
154 let frame = frame.map_err(SampleError::new_io_error)?;
155 if current_sample == sample_number || current_sample + frame.count as usize > sample_number
156 {
157 match decode_ben_line(
158 Cursor::new(&frame.raw_data),
159 frame.max_val_bits,
160 frame.max_len_bits,
161 frame.n_bytes,
162 ) {
163 Ok(assignment_rle) => return Ok(rle_to_vec(assignment_rle)),
164 Err(e) => return Err(SampleError::new_io_error(e)),
165 };
166 }
167 current_sample += frame.count as usize;
168 }
169
170 Err(SampleError {
171 kind: SampleErrorKind::SampleNotFound {
172 sample_number: current_sample,
173 },
174 })
175}
176
177/// Extracts a single assignment from a binary-encoded data stream.
178///
179/// # Arguments
180///
181/// * `reader` - The reader to extract the assignment from.
182/// * `sample_number` - The sample number to extract.
183///
184/// # Returns
185///
186/// This function returns a `Result` containing a `Vec<u16>` of the assignment if successful,
187/// or a `SampleError` if an error occurred.
188///
189/// # Example
190///
191/// ```no_run
192/// use ben::decode::read::extract_assignment_xben;
193/// use std::{fs::File, io::BufReader};
194///
195/// let file = File::open("data.jsonl.xben").unwrap();
196/// let reader = BufReader::new(file);
197/// let sample_number = 2;
198///
199/// let result = extract_assignment_xben(reader, sample_number);
200/// match result {
201/// Ok(assignment) => {
202/// eprintln!("Extracted assignment: {:?}", assignment);
203/// }
204/// Err(e) => {
205/// eprintln!("Error: {}", e);
206/// }
207/// }
208/// ```
209///
210/// # Errors
211///
212/// This function can return a `SampleError` if an error occurs during the extraction process.
213/// The error can be one of the following:
214/// * `InvalidSampleNumber` - The sample number is invalid. All sample numbers must be greater than 0.
215/// * `SampleNotFound` - The sample number was not found in the file. The last sample number is provided.
216/// * `IoError` - An IO error occurred during the extraction process.
217/// * `JsonError` - A JSON error occurred during the extraction process.
218pub fn extract_assignment_xben<R: Read>(
219 mut reader: R,
220 sample_number: usize,
221) -> Result<Vec<u16>, SampleError> {
222 if sample_number == 0 {
223 return Err(SampleError {
224 kind: SampleErrorKind::InvalidSampleNumber,
225 });
226 }
227
228 let inner_decoder = XBenDecoder::new(&mut reader).expect("Failed to create XBenDecoder");
229 let variant = inner_decoder.variant;
230 let frame_iterator = inner_decoder.into_frames();
231
232 let mut current_sample = 1;
233 for frame in frame_iterator {
234 let frame = frame.map_err(SampleError::new_io_error)?;
235 if current_sample == sample_number || current_sample + frame.1 as usize > sample_number {
236 match decode_ben32_line(Cursor::new(&frame.0), variant) {
237 Ok((assignment, _)) => return Ok(assignment),
238 Err(e) => return Err(SampleError::new_io_error(e)),
239 };
240 }
241 current_sample += frame.1 as usize;
242 }
243
244 Err(SampleError {
245 kind: SampleErrorKind::SampleNotFound {
246 sample_number: current_sample,
247 },
248 })
249}
250
251// #[cfg(test)]
252// mod tests {
253// include!("tests/read_tests.rs");
254// }
255#[cfg(test)]
256#[path = "tests/read_tests.rs"]
257mod tests;