use std::{
fs::File,
io::{BufRead, BufReader},
path::PathBuf,
};
use anyhow::anyhow;
use crate::script_types::{
BootstrapFinishRecord, BootstrapHeaderRecord, BootstrapScriptRecord,
SequencedBootstrapScriptRecord,
};
#[derive(Debug)]
pub struct BootstrapScriptReader {
files: Vec<PathBuf>,
next_file_index: usize,
current_reader: Option<BufReader<File>>,
header: BootstrapHeaderRecord,
footer: Option<SequencedBootstrapScriptRecord>,
seq: u64,
finish_returned: bool,
}
impl BootstrapScriptReader {
pub fn new(files: Vec<PathBuf>) -> anyhow::Result<Self> {
for file in &files {
if file.extension().map(|ext| ext != "jsonl").unwrap_or(true) {
return Err(anyhow!(
"Invalid script file; only JSONL files supported: {}",
file.to_string_lossy()
));
}
}
let mut reader = BootstrapScriptReader {
files,
next_file_index: 0,
current_reader: None,
header: BootstrapHeaderRecord::default(),
footer: None,
seq: 0,
finish_returned: false,
};
let read_result = reader.get_next_record();
if let Ok(seq_rec) = read_result {
if let BootstrapScriptRecord::Header(header) = seq_rec.record {
reader.header = header;
Ok(reader)
} else {
Err(anyhow!(
"Script is missing Header record: {}",
reader.get_current_file_name()
))
}
} else {
Err(anyhow!(
"Script is missing Header record: {}",
reader.get_current_file_name()
))
}
}
pub fn get_header(&self) -> BootstrapHeaderRecord {
self.header.clone()
}
fn get_next_record(&mut self) -> anyhow::Result<SequencedBootstrapScriptRecord> {
if let Some(ref footer) = self.footer {
return Ok(footer.clone());
}
if self.current_reader.is_none() {
self.open_next_file()?;
}
if let Some(reader) = &mut self.current_reader {
let mut line = String::new();
match reader.read_line(&mut line) {
Ok(0) => {
self.current_reader = None;
self.get_next_record()
}
Ok(_) => {
let record: BootstrapScriptRecord = match serde_json::from_str(&line) {
Ok(r) => r,
Err(e) => {
return Err(anyhow!(
"Bad record format in file {}: Error - {}; Record - {}",
self.get_current_file_name(),
e,
line
));
}
};
let seq_rec = match &record {
BootstrapScriptRecord::Comment(_) => {
self.seq += 1;
return self.get_next_record();
}
BootstrapScriptRecord::Header(_) => {
let seq_rec = SequencedBootstrapScriptRecord {
record: record.clone(),
seq: self.seq,
};
if seq_rec.seq > 0 {
log::warn!(
"Header record found not at start of the script: {seq_rec:?}"
);
}
seq_rec
}
_ => SequencedBootstrapScriptRecord {
record: record.clone(),
seq: self.seq,
},
};
self.seq += 1;
if let BootstrapScriptRecord::Finish(_) = seq_rec.record {
self.footer = Some(seq_rec.clone());
}
Ok(seq_rec)
}
Err(e) => Err(anyhow!("Error reading file: {e}")),
}
} else {
let footer = SequencedBootstrapScriptRecord {
record: BootstrapScriptRecord::Finish(BootstrapFinishRecord {
description: "Auto generated at end of script.".to_string(),
}),
seq: self.seq,
};
self.footer = Some(footer.clone());
Ok(footer)
}
}
fn get_current_file_name(&self) -> String {
if self.current_reader.is_some() {
let path = self.files[self.next_file_index - 1].clone();
path.to_string_lossy().into_owned()
} else {
"None".to_string()
}
}
fn open_next_file(&mut self) -> anyhow::Result<()> {
if self.next_file_index < self.files.len() {
let file_path = &self.files[self.next_file_index];
let file = File::open(file_path).map_err(|e| {
anyhow!(
"Can't open script file: {} - {}",
file_path.to_string_lossy(),
e
)
})?;
self.current_reader = Some(BufReader::new(file));
self.next_file_index += 1;
} else {
self.current_reader = None;
}
Ok(())
}
}
impl Iterator for BootstrapScriptReader {
type Item = anyhow::Result<SequencedBootstrapScriptRecord>;
fn next(&mut self) -> Option<Self::Item> {
if self.finish_returned {
return None;
}
if let Some(footer) = &self.footer {
self.finish_returned = true;
return Some(Ok(footer.clone()));
}
match self.get_next_record() {
Ok(record) => {
if self.footer.is_some() {
self.finish_returned = true;
}
Some(Ok(record))
}
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
fn create_temp_jsonl_file(content: &str) -> std::path::PathBuf {
let temp_dir = std::env::temp_dir();
let file_name = format!("test_{}.jsonl", uuid::Uuid::new_v4());
let file_path = temp_dir.join(file_name);
std::fs::write(&file_path, content).unwrap();
file_path
}
fn cleanup_temp_file(path: &Path) {
let _ = std::fs::remove_file(path);
}
#[test]
fn test_invalid_file_extension() {
let temp_dir = std::env::temp_dir();
let file_path = temp_dir.join("test.txt");
std::fs::write(&file_path, "test").unwrap();
let result = BootstrapScriptReader::new(vec![file_path.clone()]);
cleanup_temp_file(&file_path);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("only JSONL files supported"));
}
#[test]
fn test_missing_header() {
let content = r#"{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
let file_path = create_temp_jsonl_file(content);
let result = BootstrapScriptReader::new(vec![file_path.clone()]);
cleanup_temp_file(&file_path);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("missing Header record"));
}
#[test]
fn test_valid_script_with_header() {
let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}
{"kind":"Finish","description":"Done"}"#;
let file_path = create_temp_jsonl_file(content);
let reader = BootstrapScriptReader::new(vec![file_path.clone()]);
assert!(reader.is_ok());
let reader = reader.unwrap();
assert_eq!(reader.get_header().description, "Test");
cleanup_temp_file(&file_path);
}
#[test]
fn test_comment_filtering() {
let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
{"kind":"Comment","comment":"This should be filtered"}
{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
let file_path = create_temp_jsonl_file(content);
let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
let record = reader.next().unwrap().unwrap();
match record.record {
BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n1"),
_ => panic!("Expected Node record, got {:?}", record.record),
}
cleanup_temp_file(&file_path);
}
#[test]
fn test_auto_generated_finish() {
let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
let file_path = create_temp_jsonl_file(content);
let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
let rec1 = reader.next().unwrap().unwrap();
match rec1.record {
BootstrapScriptRecord::Node(_) => {}
_ => panic!("Expected Node"),
}
let rec2 = reader.next().unwrap().unwrap();
match rec2.record {
BootstrapScriptRecord::Finish(f) => {
assert!(f.description.contains("Auto generated"));
}
_ => panic!("Expected Finish"),
}
assert!(reader.next().is_none());
cleanup_temp_file(&file_path);
}
#[test]
fn test_sequence_numbering() {
let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}
{"kind":"Node","id":"n2","labels":["Test"],"properties":{}}"#;
let file_path = create_temp_jsonl_file(content);
let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
let rec1 = reader.next().unwrap().unwrap();
assert_eq!(rec1.seq, 1);
let rec2 = reader.next().unwrap().unwrap();
assert_eq!(rec2.seq, 2);
cleanup_temp_file(&file_path);
}
#[test]
fn test_malformed_json() {
let content = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
not valid json"#;
let file_path = create_temp_jsonl_file(content);
let mut reader = BootstrapScriptReader::new(vec![file_path.clone()]).unwrap();
let result = reader.next();
assert!(result.is_some());
assert!(result.unwrap().is_err());
cleanup_temp_file(&file_path);
}
#[test]
fn test_multi_file_reading() {
let content1 = r#"{"kind":"Header","start_time":"2024-01-01T00:00:00+00:00","description":"Test"}
{"kind":"Node","id":"n1","labels":["Test"],"properties":{}}"#;
let content2 = r#"{"kind":"Node","id":"n2","labels":["Test"],"properties":{}}
{"kind":"Finish","description":"Done"}"#;
let file1 = create_temp_jsonl_file(content1);
let file2 = create_temp_jsonl_file(content2);
let mut reader = BootstrapScriptReader::new(vec![file1.clone(), file2.clone()]).unwrap();
let rec1 = reader.next().unwrap().unwrap();
match rec1.record {
BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n1"),
_ => panic!("Expected Node n1"),
}
let rec2 = reader.next().unwrap().unwrap();
match rec2.record {
BootstrapScriptRecord::Node(n) => assert_eq!(n.id, "n2"),
_ => panic!("Expected Node n2"),
}
let rec3 = reader.next().unwrap().unwrap();
match rec3.record {
BootstrapScriptRecord::Finish(_) => {}
_ => panic!("Expected Finish"),
}
assert!(reader.next().is_none());
cleanup_temp_file(&file1);
cleanup_temp_file(&file2);
}
}