ben/decode/
read.rs

1//! Module documentation.
2//!
3//! This module provides functionality for extracting single assignment
4//! vectors from a BEN file.
5use serde_json::Error as SerdeError;
6use std::fmt::{self};
7use std::io::Cursor;
8use std::io::{self, Read};
9
10use super::{decode_ben32_line, decode_ben_line, rle_to_vec, BenDecoder, XBenDecoder};
11
12/// Types of errors that can occur during the extraction of assignments.
13#[derive(Debug)]
14pub enum SampleErrorKind {
15    /// Indicates the sample number is invalid. All sample numbers must be greater than 0.
16    InvalidSampleNumber,
17    /// Indicates the sample number was not found in the file. The last sample number is provided.
18    SampleNotFound { sample_number: usize },
19    /// Wrapper for IO errors.
20    IoError(io::Error),
21    /// Wrapper for JSON errors.
22    JsonError(SerdeError),
23}
24
25/// Error type for the extraction of assignments.
26#[derive(Debug)]
27pub struct SampleError {
28    pub kind: SampleErrorKind,
29}
30
31impl SampleError {
32    /// Create a new error from an IO error.
33    ///
34    /// # Arguments
35    ///
36    /// * `error` - The IO error to wrap.
37    pub fn new_io_error(error: io::Error) -> Self {
38        SampleError {
39            kind: SampleErrorKind::IoError(error),
40        }
41    }
42}
43
44impl fmt::Display for SampleError {
45    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
46        match &self.kind {
47            SampleErrorKind::InvalidSampleNumber => {
48                write!(
49                    f,
50                    "Invalid sample number. Sample number must be greater than 0"
51                )
52            }
53            SampleErrorKind::SampleNotFound { sample_number } => {
54                write!(
55                    f,
56                    "Sample number not found in file. \
57                    Failed to find sample '{}'. \
58                    Last sample seems to be '{}'",
59                    sample_number,
60                    sample_number - 1
61                )
62            }
63            SampleErrorKind::IoError(e) => {
64                write!(f, "IO Error: {}", e)
65            }
66            SampleErrorKind::JsonError(e) => {
67                write!(f, "JSON Error: {}", e)
68            }
69        }
70    }
71}
72
73impl std::error::Error for SampleError {
74    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
75        match &self.kind {
76            SampleErrorKind::InvalidSampleNumber => None,
77            SampleErrorKind::SampleNotFound { .. } => None,
78            SampleErrorKind::IoError(e) => Some(e),
79            SampleErrorKind::JsonError(e) => Some(e),
80        }
81    }
82}
83
84impl From<io::Error> for SampleError {
85    fn from(error: io::Error) -> Self {
86        SampleError::new_io_error(error)
87    }
88}
89
90impl From<SerdeError> for SampleError {
91    fn from(error: SerdeError) -> Self {
92        SampleError {
93            kind: SampleErrorKind::JsonError(error),
94        }
95    }
96}
97
98/// Extracts a single assignment from a binary-encoded data stream.
99///
100/// # Arguments
101///
102/// * `reader` - The reader to extract the assignment from.
103/// * `sample_number` - The sample number to extract.
104///
105/// # Returns
106///
107/// This function returns a `Result` containing a `Vec<u16>` of the assignment if successful,
108/// or a `SampleError` if an error occurred.
109///
110/// # Example
111///
112/// ```no_run
113/// use ben::decode::read::extract_assignment_ben;
114/// use std::{fs::File, io::BufReader};
115///
116/// let file = File::open("data.jsonl.ben").unwrap();
117/// let reader = BufReader::new(file);
118/// let sample_number = 2;
119///
120/// let result = extract_assignment_ben(reader, sample_number);
121/// match result {
122///     Ok(assignment) => {
123///         eprintln!("Extracted assignment: {:?}", assignment);
124///     }
125///     Err(e) => {
126///         eprintln!("Error: {}", e);
127///     }
128/// }
129/// ```
130///
131/// # Errors
132///
133/// This function can return a `SampleError` if an error occurs during the extraction process.
134/// The error can be one of the following:
135/// * `InvalidSampleNumber` - The sample number is invalid. All sample numbers must be greater than 0.
136/// * `SampleNotFound` - The sample number was not found in the file. The last sample number is provided.
137/// * `IoError` - An IO error occurred during the extraction process.
138/// * `JsonError` - A JSON error occurred during the extraction process.
139pub fn extract_assignment_ben<R: Read>(
140    mut reader: R,
141    sample_number: usize,
142) -> Result<Vec<u16>, SampleError> {
143    if sample_number == 0 {
144        return Err(SampleError {
145            kind: SampleErrorKind::InvalidSampleNumber,
146        });
147    }
148
149    let inner_decoder = BenDecoder::new(&mut reader).expect("Failed to create XBenDecoder");
150    let frame_iterator = inner_decoder.into_frames();
151
152    let mut current_sample = 1;
153    for frame in frame_iterator {
154        let frame = frame.map_err(SampleError::new_io_error)?;
155        if current_sample == sample_number || current_sample + frame.count as usize > sample_number
156        {
157            match decode_ben_line(
158                Cursor::new(&frame.raw_data),
159                frame.max_val_bits,
160                frame.max_len_bits,
161                frame.n_bytes,
162            ) {
163                Ok(assignment_rle) => return Ok(rle_to_vec(assignment_rle)),
164                Err(e) => return Err(SampleError::new_io_error(e)),
165            };
166        }
167        current_sample += frame.count as usize;
168    }
169
170    Err(SampleError {
171        kind: SampleErrorKind::SampleNotFound {
172            sample_number: current_sample,
173        },
174    })
175}
176
177/// Extracts a single assignment from a binary-encoded data stream.
178///
179/// # Arguments
180///
181/// * `reader` - The reader to extract the assignment from.
182/// * `sample_number` - The sample number to extract.
183///
184/// # Returns
185///
186/// This function returns a `Result` containing a `Vec<u16>` of the assignment if successful,
187/// or a `SampleError` if an error occurred.
188///
189/// # Example
190///
191/// ```no_run
192/// use ben::decode::read::extract_assignment_xben;
193/// use std::{fs::File, io::BufReader};
194///
195/// let file = File::open("data.jsonl.xben").unwrap();
196/// let reader = BufReader::new(file);
197/// let sample_number = 2;
198///
199/// let result = extract_assignment_xben(reader, sample_number);
200/// match result {
201///     Ok(assignment) => {
202///         eprintln!("Extracted assignment: {:?}", assignment);
203///     }
204///     Err(e) => {
205///         eprintln!("Error: {}", e);
206///     }
207/// }
208/// ```
209///
210/// # Errors
211///
212/// This function can return a `SampleError` if an error occurs during the extraction process.
213/// The error can be one of the following:
214/// * `InvalidSampleNumber` - The sample number is invalid. All sample numbers must be greater than 0.
215/// * `SampleNotFound` - The sample number was not found in the file. The last sample number is provided.
216/// * `IoError` - An IO error occurred during the extraction process.
217/// * `JsonError` - A JSON error occurred during the extraction process.
218pub fn extract_assignment_xben<R: Read>(
219    mut reader: R,
220    sample_number: usize,
221) -> Result<Vec<u16>, SampleError> {
222    if sample_number == 0 {
223        return Err(SampleError {
224            kind: SampleErrorKind::InvalidSampleNumber,
225        });
226    }
227
228    let inner_decoder = XBenDecoder::new(&mut reader).expect("Failed to create XBenDecoder");
229    let variant = inner_decoder.variant;
230    let frame_iterator = inner_decoder.into_frames();
231
232    let mut current_sample = 1;
233    for frame in frame_iterator {
234        let frame = frame.map_err(SampleError::new_io_error)?;
235        if current_sample == sample_number || current_sample + frame.1 as usize > sample_number {
236            match decode_ben32_line(Cursor::new(&frame.0), variant) {
237                Ok((assignment, _)) => return Ok(assignment),
238                Err(e) => return Err(SampleError::new_io_error(e)),
239            };
240        }
241        current_sample += frame.1 as usize;
242    }
243
244    Err(SampleError {
245        kind: SampleErrorKind::SampleNotFound {
246            sample_number: current_sample,
247        },
248    })
249}
250
251// #[cfg(test)]
252// mod tests {
253//     include!("tests/read_tests.rs");
254// }
255#[cfg(test)]
256#[path = "tests/read_tests.rs"]
257mod tests;