Skip to main content

drasi_bootstrap_scriptfile/
script_reader.rs

1// Copyright 2025 The Drasi Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Bootstrap script reader for processing JSONL script files
16//!
17//! This module provides functionality to read and parse bootstrap script files in JSONL format.
18//! It supports multi-file reading, automatic sequencing, header validation, comment filtering,
19//! and finish record handling.
20
21use std::{
22    fs::File,
23    io::{BufRead, BufReader},
24    path::PathBuf,
25};
26
27use anyhow::anyhow;
28
29use crate::script_types::{
30    BootstrapFinishRecord, BootstrapHeaderRecord, BootstrapScriptRecord,
31    SequencedBootstrapScriptRecord,
32};
33
34/// Reader for bootstrap script files
35///
36/// Reads JSONL files sequentially, validates header presence, filters comments,
37/// and handles finish records. Implements Iterator for record iteration.
38#[derive(Debug)]
39pub struct BootstrapScriptReader {
40    /// List of script files to read
41    files: Vec<PathBuf>,
42    /// Index of next file to open
43    next_file_index: usize,
44    /// Current file reader
45    current_reader: Option<BufReader<File>>,
46    /// Header record from the script
47    header: BootstrapHeaderRecord,
48    /// Footer/finish record (cached once encountered)
49    footer: Option<SequencedBootstrapScriptRecord>,
50    /// Current sequence number
51    seq: u64,
52    /// Whether the finish record has been returned by the iterator
53    finish_returned: bool,
54}
55
56impl BootstrapScriptReader {
57    /// Create a new bootstrap script reader
58    ///
59    /// # Arguments
60    /// * `files` - List of JSONL file paths to read in order
61    ///
62    /// # Returns
63    /// Result containing the reader or an error if validation fails or header is missing
64    ///
65    /// # Errors
66    /// - Returns error if any file doesn't have .jsonl extension
67    /// - Returns error if first record is not a Header
68    pub fn new(files: Vec<PathBuf>) -> anyhow::Result<Self> {
69        // Only supports JSONL files. Return error if any of the files are not JSONL files.
70        for file in &files {
71            if file.extension().map(|ext| ext != "jsonl").unwrap_or(true) {
72                return Err(anyhow!(
73                    "Invalid script file; only JSONL files supported: {}",
74                    file.to_string_lossy()
75                ));
76            }
77        }
78
79        let mut reader = BootstrapScriptReader {
80            files,
81            next_file_index: 0,
82            current_reader: None,
83            header: BootstrapHeaderRecord::default(),
84            footer: None,
85            seq: 0,
86            finish_returned: false,
87        };
88
89        // Read and validate first record is a Header
90        let read_result = reader.get_next_record();
91
92        if let Ok(seq_rec) = read_result {
93            if let BootstrapScriptRecord::Header(header) = seq_rec.record {
94                reader.header = header;
95                Ok(reader)
96            } else {
97                Err(anyhow!(
98                    "Script is missing Header record: {}",
99                    reader.get_current_file_name()
100                ))
101            }
102        } else {
103            Err(anyhow!(
104                "Script is missing Header record: {}",
105                reader.get_current_file_name()
106            ))
107        }
108    }
109
110    /// Get the header record from the script
111    pub fn get_header(&self) -> BootstrapHeaderRecord {
112        self.header.clone()
113    }
114
115    /// Get the next record from the script
116    ///
117    /// Reads records sequentially from all files, filtering out comments.
118    /// Once a Finish record is encountered, it is cached and always returned.
119    /// If no Finish record exists, one is auto-generated at end of files.
120    fn get_next_record(&mut self) -> anyhow::Result<SequencedBootstrapScriptRecord> {
121        // Once we have reached the end of the script, always return the Finish record.
122        if let Some(ref footer) = self.footer {
123            return Ok(footer.clone());
124        }
125
126        if self.current_reader.is_none() {
127            self.open_next_file()?;
128        }
129
130        if let Some(reader) = &mut self.current_reader {
131            let mut line = String::new();
132            match reader.read_line(&mut line) {
133                Ok(0) => {
134                    // End of current file, try next file
135                    self.current_reader = None;
136                    self.get_next_record()
137                }
138                Ok(_) => {
139                    let record: BootstrapScriptRecord = match serde_json::from_str(&line) {
140                        Ok(r) => r,
141                        Err(e) => {
142                            return Err(anyhow!(
143                                "Bad record format in file {}: Error - {}; Record - {}",
144                                self.get_current_file_name(),
145                                e,
146                                line
147                            ));
148                        }
149                    };
150
151                    let seq_rec = match &record {
152                        BootstrapScriptRecord::Comment(_) => {
153                            // The BootstrapScriptReader should never return a Comment record.
154                            // Return the next record, but need to increment sequence counter
155                            self.seq += 1;
156                            return self.get_next_record();
157                        }
158                        BootstrapScriptRecord::Header(_) => {
159                            let seq_rec = SequencedBootstrapScriptRecord {
160                                record: record.clone(),
161                                seq: self.seq,
162                            };
163
164                            // Warn if there is a Header record in the middle of the script.
165                            if seq_rec.seq > 0 {
166                                log::warn!(
167                                    "Header record found not at start of the script: {seq_rec:?}"
168                                );
169                            }
170
171                            seq_rec
172                        }
173                        _ => SequencedBootstrapScriptRecord {
174                            record: record.clone(),
175                            seq: self.seq,
176                        },
177                    };
178                    self.seq += 1;
179
180                    // If the record is a Finish record, set the footer and return it so it is always returned in the future.
181                    if let BootstrapScriptRecord::Finish(_) = seq_rec.record {
182                        self.footer = Some(seq_rec.clone());
183                    }
184                    Ok(seq_rec)
185                }
186                Err(e) => Err(anyhow!("Error reading file: {e}")),
187            }
188        } else {
189            // Generate a synthetic Finish record to mark the end of the script.
190            let footer = SequencedBootstrapScriptRecord {
191                record: BootstrapScriptRecord::Finish(BootstrapFinishRecord {
192                    description: "Auto generated at end of script.".to_string(),
193                }),
194                seq: self.seq,
195            };
196            self.footer = Some(footer.clone());
197            Ok(footer)
198        }
199    }
200
201    /// Get the current file name (for error reporting)
202    fn get_current_file_name(&self) -> String {
203        if self.current_reader.is_some() {
204            let path = self.files[self.next_file_index - 1].clone();
205            path.to_string_lossy().into_owned()
206        } else {
207            "None".to_string()
208        }
209    }
210
211    /// Open the next file in the sequence
212    fn open_next_file(&mut self) -> anyhow::Result<()> {
213        if self.next_file_index < self.files.len() {
214            let file_path = &self.files[self.next_file_index];
215            let file = File::open(file_path).map_err(|e| {
216                anyhow!(
217                    "Can't open script file: {} - {}",
218                    file_path.to_string_lossy(),
219                    e
220                )
221            })?;
222            self.current_reader = Some(BufReader::new(file));
223            self.next_file_index += 1;
224        } else {
225            self.current_reader = None;
226        }
227        Ok(())
228    }
229}
230
231impl Iterator for BootstrapScriptReader {
232    type Item = anyhow::Result<SequencedBootstrapScriptRecord>;
233
234    fn next(&mut self) -> Option<Self::Item> {
235        // If we've already returned the finish record, we're done
236        if self.finish_returned {
237            return None;
238        }
239
240        // If footer is set but not yet returned, return it now
241        if let Some(footer) = &self.footer {
242            self.finish_returned = true;
243            return Some(Ok(footer.clone()));
244        }
245
246        // Get the next record
247        match self.get_next_record() {
248            Ok(record) => {
249                // Check if a finish record was just cached
250                if self.footer.is_some() {
251                    // The record we got back IS the finish record
252                    self.finish_returned = true;
253                }
254                Some(Ok(record))
255            }
256            Err(e) => Some(Err(e)),
257        }
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use std::path::Path;
265
266    fn create_temp_jsonl_file(content: &str) -> std::path::PathBuf {
267        let temp_dir = std::env::temp_dir();
268        let file_name = format!("test_{}.jsonl", uuid::Uuid::new_v4());
269        let file_path = temp_dir.join(file_name);
270
271        std::fs::write(&file_path, content).unwrap();
272        file_path
273    }
274
275    fn cleanup_temp_file(path: &Path) {
276        let _ = std::fs::remove_file(path);
277    }
278
279    #[test]
280    fn test_invalid_file_extension() {
281        let temp_dir = std::env::temp_dir();
282        let file_path = temp_dir.join("test.txt");
283        std::fs::write(&file_path, "test").unwrap();
284
285        let result = BootstrapScriptReader::new(vec![file_path.clone()]);
286        cleanup_temp_file(&file_path);
287
288        assert!(result.is_err());
289        assert!(result
290            .unwrap_err()
291            .to_string()
292            .contains("only JSONL files supported"));
293    }
294
295    #[test]
296    fn test_missing_header() {
297        let content = r#"{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
298        let file_path = create_temp_jsonl_file(content);
299
300        let result = BootstrapScriptReader::new(vec![file_path.clone()]);
301        cleanup_temp_file(&file_path);
302
303        assert!(result.is_err());
304        assert!(result
305            .unwrap_err()
306            .to_string()
307            .contains("missing Header record"));
308    }
309
310    #[test]
311    fn test_valid_script_with_header() {
312        let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
313{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}
314{"kind":"Finish","description":"Done"}"#;
315        let file_path = create_temp_jsonl_file(content);
316
317        let reader = BootstrapScriptReader::new(vec![file_path.clone()]);
318        assert!(reader.is_ok());
319
320        let reader = reader.unwrap();
321        assert_eq!(reader.get_header().description, "Test");
322
323        cleanup_temp_file(&file_path);
324    }
325
326    #[test]
327    fn test_comment_filtering() {
328        let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
329{"kind":"Comment","comment":"This should be filtered"}
330{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
331        let file_path = create_temp_jsonl_file(content);
332
333        let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
334
335        // First record after header should be Node (comment filtered)
336        let record = reader.next().unwrap().unwrap();
337        match record.record {
338            BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n1"),
339            _ => panic!("Expected Node record, got {:?}", record.record),
340        }
341
342        cleanup_temp_file(&file_path);
343    }
344
345    #[test]
346    fn test_auto_generated_finish() {
347        let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
348{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
349        let file_path = create_temp_jsonl_file(content);
350
351        let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
352
353        // Read the node
354        let rec1 = reader.next().unwrap().unwrap();
355        match rec1.record {
356            BootstrapScriptRecord::Node(_) => {}
357            _ => panic!("Expected Node"),
358        }
359
360        // Auto-generated Finish record is returned
361        let rec2 = reader.next().unwrap().unwrap();
362        match rec2.record {
363            BootstrapScriptRecord::Finish(f) => {
364                assert!(f.description.contains("Auto generated"));
365            }
366            _ => panic!("Expected Finish"),
367        }
368
369        // After Finish, iteration stops
370        assert!(reader.next().is_none());
371
372        cleanup_temp_file(&file_path);
373    }
374
375    #[test]
376    fn test_sequence_numbering() {
377        let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
378{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}
379{"kind":"Node","id":"n2","labels":["Test"],"properties":{}}"#;
380        let file_path = create_temp_jsonl_file(content);
381
382        let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
383
384        let rec1 = reader.next().unwrap().unwrap();
385        assert_eq!(rec1.seq, 1); // Header is seq 0
386
387        let rec2 = reader.next().unwrap().unwrap();
388        assert_eq!(rec2.seq, 2);
389
390        cleanup_temp_file(&file_path);
391    }
392
393    #[test]
394    fn test_malformed_json() {
395        let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
396not valid json"#;
397        let file_path = create_temp_jsonl_file(content);
398
399        let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
400
401        let result = reader.next();
402        assert!(result.is_some());
403        assert!(result.unwrap().is_err());
404
405        cleanup_temp_file(&file_path);
406    }
407
408    #[test]
409    fn test_multi_file_reading() {
410        let content1 = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
411{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
412        let content2 = r#"{"kind":"Node","id":"n2","labels":["Test"],"properties":{}}
413{"kind":"Finish","description":"Done"}"#;
414
415        let file1 = create_temp_jsonl_file(content1);
416        let file2 = create_temp_jsonl_file(content2);
417
418        let mut reader = BootstrapScriptReader::new(vec![file1.clone(), file2.clone()]).unwrap();
419
420        // Read both nodes
421        let rec1 = reader.next().unwrap().unwrap();
422        match rec1.record {
423            BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n1"),
424            _ => panic!("Expected Node n1"),
425        }
426
427        let rec2 = reader.next().unwrap().unwrap();
428        match rec2.record {
429            BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n2"),
430            _ => panic!("Expected Node n2"),
431        }
432
433        // Finish record from file2 is returned
434        let rec3 = reader.next().unwrap().unwrap();
435        match rec3.record {
436            BootstrapScriptRecord::Finish(_) => {}
437            _ => panic!("Expected Finish"),
438        }
439
440        // Iterator stops after returning Finish
441        assert!(reader.next().is_none());
442
443        cleanup_temp_file(&file1);
444        cleanup_temp_file(&file2);
445    }
446}