batch_mode_batch_schema/
load_batch_files.rs

1// ---------------- [ File: batch-mode-batch-schema/src/load_batch_files.rs ]
2crate::ix!();
3
4/**
5 * ALL-OR-NOTHING loader for the input file.  
6 * 
7 * Reads NDJSON (one `LanguageModelBatchAPIRequest` per line). 
8 * If **any** line is invalid, we immediately fail (do not skip).
9 */
10pub async fn load_input_file(path: impl AsRef<Path>) -> Result<BatchInputData, JsonParseError> {
11    info!("loading input file: {:?}", path.as_ref());
12
13    // If the file doesn’t exist:
14    if !path.as_ref().exists() {
15        if is_test_mode() {
16            warn!(
17                "Mock scenario (test-only): Input file not found at {:?}; returning empty BatchInputData.",
18                path.as_ref()
19            );
20            return Ok(BatchInputData::new(vec![]));
21        } else {
22            error!(
23                "Input file does not exist at {:?}; failing with IoError",
24                path.as_ref()
25            );
26            let io_err = std::io::Error::new(
27                std::io::ErrorKind::NotFound,
28                format!("Input file not found: {}", path.as_ref().display()),
29            );
30            return Err(JsonParseError::IoError(io_err));
31        }
32    }
33
34    let file = File::open(path.as_ref()).await?;
35    let reader = BufReader::new(file);
36    let mut lines = reader.lines();
37
38    let mut requests = Vec::new();
39    while let Some(line) = lines.next_line().await? {
40        let trimmed = line.trim();
41        if trimmed.is_empty() {
42            trace!("Skipping empty line in input file: {}", path.as_ref().display());
43            continue;
44        }
45
46        trace!("Attempting to parse input line: {}", trimmed);
47        // ALL OR NOTHING => if parse fails, return an error immediately.
48        let request = match serde_json::from_str::<LanguageModelBatchAPIRequest>(trimmed) {
49            Ok(req) => req,
50            Err(e) => {
51                error!(
52                    "Invalid line in input file => returning error. line='{}' error='{}'",
53                    trimmed, e
54                );
55                return Err(JsonParseError::SerdeError(e));
56            }
57        };
58        requests.push(request);
59    }
60
61    info!(
62        "Successfully loaded {} request(s) from input file: {:?}",
63        requests.len(),
64        path.as_ref()
65    );
66    Ok(BatchInputData::new(requests))
67}
68
69/**
70 * ALL-OR-NOTHING loader for the error file.  
71 * 
72 * Reads NDJSON (one `BatchResponseRecord` per line). 
73 * If any line is invalid, we return an error. No skipping.
74 */
75pub async fn load_error_file(path: impl AsRef<Path>) -> Result<BatchErrorData, JsonParseError> {
76    info!("loading error file: {:?}", path.as_ref());
77
78    if !path.as_ref().exists() {
79        if is_test_mode() {
80            warn!(
81                "Mock scenario (test-only): Error file not found at {:?}; returning empty BatchErrorData.",
82                path.as_ref()
83            );
84            return Ok(BatchErrorData::new(vec![]));
85        } else {
86            error!(
87                "Error file does not exist at {:?}; failing with IoError",
88                path.as_ref()
89            );
90            let io_err = std::io::Error::new(
91                std::io::ErrorKind::NotFound,
92                format!("Error file not found: {}", path.as_ref().display()),
93            );
94            return Err(JsonParseError::IoError(io_err));
95        }
96    }
97
98    let file = File::open(path.as_ref()).await?;
99    let reader = BufReader::new(file);
100    let mut lines = reader.lines();
101
102    let mut responses = Vec::new();
103    while let Some(line) = lines.next_line().await? {
104        let trimmed = line.trim();
105        if trimmed.is_empty() {
106            trace!("Skipping empty line in error file: {}", path.as_ref().display());
107            continue;
108        }
109
110        trace!("Attempting to parse error-file line: {}", trimmed);
111        // ALL OR NOTHING => if parse fails, return an error
112        let response_record = match serde_json::from_str::<BatchResponseRecord>(trimmed) {
113            Ok(r) => r,
114            Err(e) => {
115                error!(
116                    "Invalid line in error file => returning error. line='{}' error='{}'",
117                    trimmed, e
118                );
119                return Err(JsonParseError::SerdeError(e));
120            }
121        };
122        responses.push(response_record);
123    }
124
125    info!(
126        "Successfully loaded {} record(s) from error file: {:?}",
127        responses.len(),
128        path.as_ref()
129    );
130    Ok(BatchErrorData::new(responses))
131}
132
133/**
134 * ALL-OR-NOTHING loader for the output file.  
135 * 
136 * Reads NDJSON (one `BatchResponseRecord` per line). 
137 * If any line is invalid, we fail immediately. 
138 */
139pub async fn load_output_file(path: impl AsRef<Path>) -> Result<BatchOutputData, JsonParseError> {
140    info!("loading output file: {:?}", path.as_ref());
141
142    if !path.as_ref().exists() {
143        if is_test_mode() {
144            warn!(
145                "Mock scenario (test-only): Output file not found at {:?}; returning empty BatchOutputData.",
146                path.as_ref()
147            );
148            return Ok(BatchOutputData::new(vec![]));
149        } else {
150            error!(
151                "Output file does not exist at {:?}; failing with IoError",
152                path.as_ref()
153            );
154            let io_err = std::io::Error::new(
155                std::io::ErrorKind::NotFound,
156                format!("Output file not found: {}", path.as_ref().display()),
157            );
158            return Err(JsonParseError::IoError(io_err));
159        }
160    }
161
162    let file = File::open(path.as_ref()).await?;
163    let reader = BufReader::new(file);
164    let mut lines = reader.lines();
165
166    let mut responses = Vec::new();
167    while let Some(line) = lines.next_line().await? {
168        let trimmed = line.trim();
169        if trimmed.is_empty() {
170            trace!("Skipping empty line in output file: {}", path.as_ref().display());
171            continue;
172        }
173
174        trace!("Attempting to parse output-file line: {}", trimmed);
175        // ALL OR NOTHING => if parse fails, return an error
176        let response_record = match serde_json::from_str::<BatchResponseRecord>(trimmed) {
177            Ok(r) => r,
178            Err(e) => {
179                error!(
180                    "Invalid line in output file => returning error. line='{}' error='{}'",
181                    trimmed, e
182                );
183                return Err(JsonParseError::SerdeError(e));
184            }
185        };
186        responses.push(response_record);
187    }
188
189    info!(
190        "Successfully loaded {} record(s) from output file: {:?}",
191        responses.len(),
192        path.as_ref()
193    );
194    Ok(BatchOutputData::new(responses))
195}
196
197#[cfg(test)]
198mod file_loading_tests {
199    use super::*;
200    use tempfile::NamedTempFile;
201    use std::io::Write;
202    
203    
204
205    fn write_lines_to_temp_file(file: &mut NamedTempFile, lines: &[&str]) {
206        for line in lines {
207            writeln!(file, "{}", line).unwrap();
208        }
209    }
210
211    #[traced_test]
212    async fn should_load_input_file_successfully() {
213        info!("Testing load_input_file with valid JSON lines.");
214
215        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file for input test.");
216        let line_1 = make_valid_lmb_api_request_json_mock("input-1");
217        let line_2 = make_valid_lmb_api_request_json_mock("input-2");
218        write_lines_to_temp_file(&mut temp_file, &[&line_1, &line_2]);
219
220        let result = load_input_file(temp_file.path()).await;
221
222        assert!(result.is_ok(), "Should successfully load valid input file.");
223        let data = result.unwrap();
224        pretty_assert_eq!(data.requests().len(), 2, "Should have exactly two requests.");
225        debug!("Loaded {} requests from input file.", data.requests().len());
226    }
227
228    #[traced_test]
229    async fn should_fail_load_input_file_with_invalid_json() {
230        info!("Testing load_input_file with an invalid JSON line.");
231
232        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file.");
233        write_lines_to_temp_file(&mut temp_file, &["{ invalid json }"]);
234
235        let result = load_input_file(temp_file.path()).await;
236
237        assert!(result.is_err(), "Should fail to load invalid JSON line.");
238        error!("Received expected error for malformed input JSON: {:?}", result.err());
239    }
240
241    #[traced_test]
242    async fn should_handle_empty_file_for_input_load() {
243        info!("Testing load_input_file with an empty file.");
244
245        let temp_file = NamedTempFile::new().expect("Failed to create temp file.");
246
247        let result = load_input_file(temp_file.path()).await;
248
249        assert!(result.is_ok(), "Empty file should load successfully, returning 0 requests.");
250        let data = result.unwrap();
251        pretty_assert_eq!(data.requests().len(), 0, "Should have zero requests from empty file.");
252        debug!("Empty file loaded without error, as expected.");
253    }
254
255    #[traced_test]
256    async fn should_fail_load_error_file_with_invalid_json() {
257        info!("Testing load_error_file with an invalid JSON line.");
258
259        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file.");
260        write_lines_to_temp_file(&mut temp_file, &["{{ broken stuff }}"]);
261
262        let result = load_error_file(temp_file.path()).await;
263
264        assert!(result.is_err(), "Should fail when encountering invalid JSON in error file.");
265        warn!("Got expected parse error: {:?}", result.err());
266    }
267
268    #[traced_test]
269    async fn should_handle_empty_file_for_error_load() {
270        info!("Testing load_error_file with an empty file.");
271
272        let temp_file = NamedTempFile::new().expect("Failed to create temp file.");
273
274        let result = load_error_file(temp_file.path()).await;
275
276        assert!(result.is_ok(), "Empty error file should load successfully, returning 0 responses.");
277        let data = result.unwrap();
278        pretty_assert_eq!(data.responses().len(), 0, "No responses expected from empty error file.");
279        debug!("Empty error file loaded with no issues.");
280    }
281
282    #[traced_test]
283    async fn should_fail_load_output_file_with_invalid_json() {
284        info!("Testing load_output_file with invalid JSON line.");
285
286        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file.");
287        write_lines_to_temp_file(&mut temp_file, &["}{ definitely not valid JSON"]);
288
289        let result = load_output_file(temp_file.path()).await;
290
291        assert!(result.is_err(), "Should fail for malformed JSON lines.");
292        error!("Encountered expected error: {:?}", result.err());
293    }
294
295    #[traced_test]
296    async fn should_handle_empty_file_for_output_load() {
297        info!("Testing load_output_file with an empty file.");
298
299        let temp_file = NamedTempFile::new().expect("Failed to create temp file.");
300
301        let result = load_output_file(temp_file.path()).await;
302
303        assert!(result.is_ok(), "Should succeed loading an empty output file.");
304        let data = result.unwrap();
305        pretty_assert_eq!(data.responses().len(), 0, "No responses in empty file.");
306        trace!("Empty file loaded correctly for output load.");
307    }
308
309    #[traced_test]
310    async fn should_load_error_file_successfully() {
311        info!("Testing load_error_file with valid NDJSON lines.");
312
313        // Single-line JSON 1 (status_code=400; 'object':'error').
314        let line_1 = r#"{"id":"batch_req_error-file-1","custom_id":"error-file-1","response":{"status_code":400,"request_id":"resp_req_error-file-1","body":{"error":{"message":"Error for error-file-1","type":"test_error","param":null,"code":null},"object":"error"}},"error":null}"#;
315
316        // Single-line JSON 2 (same shape).
317        let line_2 = r#"{"id":"batch_req_error-file-2","custom_id":"error-file-2","response":{"status_code":400,"request_id":"resp_req_error-file-2","body":{"error":{"message":"Error for error-file-2","type":"test_error","param":null,"code":null},"object":"error"}},"error":null}"#;
318
319        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file for error file test.");
320        writeln!(temp_file, "{}", line_1).expect("Failed to write line_1");
321        writeln!(temp_file, "{}", line_2).expect("Failed to write line_2");
322
323        let result = load_error_file(temp_file.path()).await;
324
325        assert!(result.is_ok(), "Should successfully load valid error file lines.");
326        let data = result.unwrap();
327        pretty_assert_eq!(data.responses().len(), 2, "Should have exactly two responses for error data.");
328        trace!("Loaded {} responses from error file.", data.responses().len());
329    }
330
331    #[traced_test]
332    async fn should_load_output_file_successfully() {
333        info!("Testing load_output_file with valid NDJSON lines.");
334
335        // Single-line JSON 1 (status_code=200, 'object':'chat.completion').
336        let line_1 = r#"{"id":"batch_req_output-file-1","custom_id":"output-file-1","response":{"status_code":200,"request_id":"resp_req_output-file-1","body":{"id":"success-id","object":"chat.completion","created":0,"model":"test-model","choices":[],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}},"error":null}"#;
337
338        // Single-line JSON 2 (another success record).
339        let line_2 = r#"{"id":"batch_req_output-file-2","custom_id":"output-file-2","response":{"status_code":200,"request_id":"resp_req_output-file-2","body":{"id":"success-id-2","object":"chat.completion","created":0,"model":"test-model-2","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30}}},"error":null}"#;
340
341        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file for output file test.");
342        writeln!(temp_file, "{}", line_1).expect("Failed to write line_1");
343        writeln!(temp_file, "{}", line_2).expect("Failed to write line_2");
344
345        let result = load_output_file(temp_file.path()).await;
346
347        assert!(result.is_ok(), "Should load valid output file lines successfully.");
348        let data = result.unwrap();
349        pretty_assert_eq!(data.responses().len(), 2, "Should have exactly 2 response records from output file.");
350        debug!("Loaded {} records from output file.", data.responses().len());
351    }
352}