use batless::{BatlessConfig, StreamingCheckpoint};
use std::io::Write;
use std::path::Path;
use std::process::Command;
use tempfile::{NamedTempFile, TempDir};
#[test]
fn test_streaming_checkpoint_creation() {
let config = BatlessConfig::default();
let checkpoint = StreamingCheckpoint::new("test_file.txt".to_string(), 10, 250, 1, &config);
assert_eq!(checkpoint.line_number, 10);
assert_eq!(checkpoint.bytes_processed, 250);
assert_eq!(checkpoint.chunk_number, 1);
assert_eq!(checkpoint.file_path, "test_file.txt");
assert!(!checkpoint.timestamp.is_empty(), "Timestamp should be set");
assert!(
!checkpoint.config_hash.is_empty(),
"Config hash should be set"
);
assert_eq!(checkpoint.schema_version, config.schema_version);
}
#[test]
fn test_streaming_checkpoint_compatibility() {
let config = BatlessConfig::default()
.with_streaming_json(true)
.with_streaming_chunk_size(10);
let checkpoint = StreamingCheckpoint::new("test_file.txt".to_string(), 15, 500, 2, &config);
assert!(
checkpoint.is_compatible(&config),
"Checkpoint should be compatible with same config"
);
let different_config = config.clone().with_max_lines(5000);
assert!(
!checkpoint.is_compatible(&different_config),
"Checkpoint should be incompatible with different max_lines"
);
let different_config2 = config.clone().with_include_tokens(true);
assert!(
!checkpoint.is_compatible(&different_config2),
"Checkpoint should be incompatible with different token setting"
);
let different_config3 = config.with_language(Some("Python".to_string()));
assert!(
!checkpoint.is_compatible(&different_config3),
"Checkpoint should be incompatible with different language"
);
}
#[test]
fn test_streaming_checkpoint_save_load() {
let temp_dir = TempDir::new().unwrap();
let checkpoint_path = temp_dir.path().join("checkpoint.json");
let config = BatlessConfig::default();
let original_checkpoint =
StreamingCheckpoint::new("test_file.txt".to_string(), 25, 1000, 3, &config);
batless::StreamingProcessor::save_checkpoint(&original_checkpoint, &checkpoint_path)
.expect("Should save checkpoint successfully");
assert!(
checkpoint_path.exists(),
"Checkpoint file should be created"
);
let loaded_checkpoint = batless::StreamingProcessor::load_checkpoint(&checkpoint_path)
.expect("Should load checkpoint successfully");
assert_eq!(loaded_checkpoint.file_path, original_checkpoint.file_path);
assert_eq!(
loaded_checkpoint.line_number,
original_checkpoint.line_number
);
assert_eq!(
loaded_checkpoint.bytes_processed,
original_checkpoint.bytes_processed
);
assert_eq!(
loaded_checkpoint.chunk_number,
original_checkpoint.chunk_number
);
assert_eq!(
loaded_checkpoint.config_hash,
original_checkpoint.config_hash
);
assert_eq!(
loaded_checkpoint.schema_version,
original_checkpoint.schema_version
);
assert_eq!(loaded_checkpoint.timestamp, original_checkpoint.timestamp);
}
#[test]
fn test_streaming_checkpoint_load_nonexistent() {
let nonexistent_path = Path::new("/nonexistent/checkpoint.json");
let result = batless::StreamingProcessor::load_checkpoint(nonexistent_path);
assert!(
result.is_err(),
"Loading nonexistent checkpoint should fail"
);
}
#[test]
fn test_streaming_checkpoint_load_invalid_json() {
let temp_dir = TempDir::new().unwrap();
let checkpoint_path = temp_dir.path().join("invalid.json");
std::fs::write(&checkpoint_path, "invalid json content").expect("Should write invalid JSON");
let result = batless::StreamingProcessor::load_checkpoint(&checkpoint_path);
assert!(result.is_err(), "Loading invalid JSON should fail");
}
#[test]
fn test_streaming_processor_with_stdin() {
let config = BatlessConfig::default().with_streaming_json(true);
let checkpoint = StreamingCheckpoint::new("-".to_string(), 0, 0, 0, &config);
let result = batless::StreamingProcessor::process_streaming("-", &config, Some(checkpoint));
assert!(result.is_err(), "Stdin with checkpoint should fail");
if let Err(error) = result {
assert!(
error.to_string().contains("Resume/checkpoint") || error.to_string().contains("stdin"),
"Error should mention checkpoint/stdin incompatibility: {}",
error
);
}
}
#[test]
fn test_streaming_processor_checkpoint_file_mismatch() {
let config = BatlessConfig::default().with_streaming_json(true);
let checkpoint =
StreamingCheckpoint::new("different_file.txt".to_string(), 10, 500, 1, &config);
let result = batless::StreamingProcessor::process_streaming(
"current_file.txt",
&config,
Some(checkpoint),
);
assert!(result.is_err(), "Mismatched file path should fail");
if let Err(error) = result {
assert!(
error.to_string().contains("file path") || error.to_string().contains("doesn't match"),
"Error should mention file path mismatch: {}",
error
);
}
}
#[test]
fn test_streaming_processor_incompatible_checkpoint() {
let config = BatlessConfig::default()
.with_streaming_json(true)
.with_max_lines(1000);
let different_config = config.clone().with_max_lines(2000);
let checkpoint =
StreamingCheckpoint::new("test_file.txt".to_string(), 10, 500, 1, &different_config);
let result =
batless::StreamingProcessor::process_streaming("test_file.txt", &config, Some(checkpoint));
assert!(result.is_err(), "Incompatible checkpoint should fail");
if let Err(error) = result {
assert!(
error.to_string().contains("incompatible")
|| error.to_string().contains("configuration"),
"Error should mention incompatibility: {}",
error
);
}
}
#[test]
fn test_streaming_get_schema() {
let schema = batless::StreamingProcessor::get_streaming_schema();
assert!(schema.is_object(), "Schema should be a JSON object");
assert!(
schema["$schema"].is_string(),
"Schema should have $schema field"
);
assert!(
schema["title"].is_string(),
"Schema should have title field"
);
assert!(
schema["description"].is_string(),
"Schema should have description field"
);
assert_eq!(schema["type"], "object", "Schema type should be object");
}
#[test]
fn test_streaming_checkpoint_config_hash_changes() {
let config1 = BatlessConfig::default().with_max_lines(1000);
let config2 = BatlessConfig::default().with_max_lines(2000);
let checkpoint1 = StreamingCheckpoint::new("test.txt".to_string(), 0, 0, 0, &config1);
let checkpoint2 = StreamingCheckpoint::new("test.txt".to_string(), 0, 0, 0, &config2);
assert_ne!(
checkpoint1.config_hash, checkpoint2.config_hash,
"Different configs should produce different hashes"
);
}
#[test]
fn test_streaming_checkpoint_serialization() {
let config = BatlessConfig::default();
let checkpoint = StreamingCheckpoint::new("test_file.txt".to_string(), 42, 1024, 5, &config);
let json_str = serde_json::to_string(&checkpoint).expect("Should serialize to JSON");
assert!(
json_str.contains("test_file.txt"),
"JSON should contain file path"
);
assert!(json_str.contains("42"), "JSON should contain line number");
assert!(
json_str.contains("1024"),
"JSON should contain bytes processed"
);
assert!(json_str.contains("5"), "JSON should contain chunk number");
let deserialized: StreamingCheckpoint =
serde_json::from_str(&json_str).expect("Should deserialize from JSON");
assert_eq!(deserialized.file_path, checkpoint.file_path);
assert_eq!(deserialized.line_number, checkpoint.line_number);
assert_eq!(deserialized.bytes_processed, checkpoint.bytes_processed);
assert_eq!(deserialized.chunk_number, checkpoint.chunk_number);
}
fn run_batless_cli(args: &[&str]) -> std::process::Output {
Command::new(env!("CARGO_BIN_EXE_batless"))
.args(args)
.output()
.expect("Failed to execute batless")
}
fn create_streaming_test_file(content: &str, extension: &str) -> NamedTempFile {
let mut file = tempfile::Builder::new()
.suffix(extension)
.tempfile()
.unwrap();
file.write_all(content.as_bytes()).unwrap();
file
}
#[test]
fn test_chunk_strategy_line_default() {
let content = "pub fn alpha() -> i32 {\n let x = 1;\n let y = 2;\n let z = 3;\n let w = 4;\n let v = 5;\n x + y + z + w + v\n}\n\npub fn beta() -> i32 {\n let a = 10;\n let b = 20;\n let c = 30;\n let d = 40;\n let e = 50;\n a + b + c + d + e\n}\n\npub fn gamma() -> i32 {\n let m = 100;\n let n = 200;\n let o = 300;\n let p = 400;\n let q = 500;\n m + n + o + p + q\n}\n\npub fn delta() {\n // intentionally empty\n}\n";
let file = create_streaming_test_file(content, ".rs");
let path = file.path().to_str().unwrap();
let output = run_batless_cli(&[
"--streaming-json",
"--mode=json",
"--streaming-chunk-size=10",
path,
]);
assert!(
output.status.success(),
"batless --streaming-json --streaming-chunk-size=10 should succeed"
);
let stdout = String::from_utf8(output.stdout).unwrap();
assert!(!stdout.is_empty(), "Output should not be empty");
let chunks: Vec<serde_json::Value> = stdout
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| serde_json::from_str(l).expect("Each NDJSON line should be valid JSON"))
.collect();
assert!(
chunks.len() > 1,
"File with >30 lines and chunk-size=10 should produce multiple chunks, got {}",
chunks.len()
);
let total_lines_in_chunks: usize = chunks
.iter()
.map(|c| c["lines"].as_array().map(|a| a.len()).unwrap_or(0))
.sum();
let file_total_lines = content.lines().count();
assert_eq!(
total_lines_in_chunks, file_total_lines,
"All file lines should appear across chunks: expected {}, got {}",
file_total_lines, total_lines_in_chunks
);
}
#[test]
fn test_chunk_strategy_semantic() {
let content = "pub fn alpha() -> i32 {\n let x = 1;\n let y = 2;\n let z = 3;\n x + y + z\n}\n\npub fn beta() -> i32 {\n let a = 10;\n let b = 20;\n a + b\n}\n\npub fn gamma() -> i32 {\n let m = 100;\n let n = 200;\n m + n\n}\n\npub fn delta() {\n // empty\n}\n\npub fn epsilon() -> bool {\n true\n}\n";
let file = create_streaming_test_file(content, ".rs");
let path = file.path().to_str().unwrap();
let output = run_batless_cli(&[
"--streaming-json",
"--mode=json",
"--streaming-chunk-size=5",
"--chunk-strategy=semantic",
path,
]);
assert!(
output.status.success(),
"batless --chunk-strategy=semantic should succeed"
);
let stdout = String::from_utf8(output.stdout).unwrap();
assert!(!stdout.is_empty(), "Output should not be empty");
let chunks: Vec<serde_json::Value> = stdout
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| serde_json::from_str(l).expect("Each NDJSON line should be valid JSON"))
.collect();
assert!(!chunks.is_empty(), "Should produce at least one chunk");
let any_extended = chunks
.iter()
.any(|c| c["lines"].as_array().map(|a| a.len() > 5).unwrap_or(false));
assert!(
any_extended,
"At least one semantic chunk should extend beyond the nominal chunk size of 5"
);
let total_lines_in_chunks: usize = chunks
.iter()
.map(|c| c["lines"].as_array().map(|a| a.len()).unwrap_or(0))
.sum();
let file_total_lines = content.lines().count();
assert_eq!(
total_lines_in_chunks, file_total_lines,
"All file lines should appear across semantic chunks: expected {}, got {}",
file_total_lines, total_lines_in_chunks
);
}
#[test]
fn test_chunk_strategy_unsupported_language_falls_back() {
let lines: Vec<String> = (1..=20).map(|i| format!("plain text line {}", i)).collect();
let content = lines.join("\n") + "\n";
let file = create_streaming_test_file(&content, ".txt");
let path = file.path().to_str().unwrap();
let output = run_batless_cli(&[
"--streaming-json",
"--mode=json",
"--streaming-chunk-size=5",
"--chunk-strategy=semantic",
path,
]);
assert!(
output.status.success(),
"batless --chunk-strategy=semantic on a .txt file should succeed (fallback to line-based)"
);
let stdout = String::from_utf8(output.stdout).unwrap();
assert!(
!stdout.is_empty(),
"Output should not be empty even when falling back"
);
let chunks: Vec<serde_json::Value> = stdout
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| serde_json::from_str(l).expect("Each NDJSON line should be valid JSON"))
.collect();
assert!(!chunks.is_empty(), "Should produce at least one chunk");
let total_lines_in_chunks: usize = chunks
.iter()
.map(|c| c["lines"].as_array().map(|a| a.len()).unwrap_or(0))
.sum();
let file_total_lines = lines.len();
assert_eq!(
total_lines_in_chunks, file_total_lines,
"All text lines should appear across fallback chunks: expected {}, got {}",
file_total_lines, total_lines_in_chunks
);
}