use koicore::parser::decode_buf_reader::DecodeBufReader;
use koicore::parser::input::EncodingErrorStrategy;
use std::io::{BufRead, Cursor};
#[test]
fn test_decode_buf_reader_utf8() {
let data = "Hello, 世界!\n测试数据\nLine 3".as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut line = String::new();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert_eq!(line, "Hello, 世界!\n");
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert_eq!(line, "测试数据\n");
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert_eq!(line, "Line 3");
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert_eq!(bytes_read, 0);
assert_eq!(line, "");
}
#[test]
fn test_decode_buf_reader_chunk_decoding() {
let data = "Hello, 世界!\n测试数据\nLine 3".as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
assert!(decoder.decode_chunk(100).unwrap());
let result = decoder.take_string().unwrap();
assert!(!result.is_empty());
assert!(result.contains("Hello"));
}
#[test]
fn test_decode_buf_reader_empty_input() {
let data: &[u8] = &[];
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut line = String::new();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert_eq!(bytes_read, 0);
assert_eq!(line, "");
}
#[test]
fn test_decode_buf_reader_with_encoding() {
let data = vec![0xC4, 0xE3, 0xBA, 0xC3, 0x0A, 0xCA, 0xC0, 0xBD, 0xE7]; let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::with_encoding(cursor, encoding_rs::GBK);
let mut line = String::new();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert!(line.contains("好")); }
#[test]
fn test_decode_buf_reader_error_handling() {
let data = vec![0xC4, 0xE3, 0xBA, 0xC3, 0x0A, 0xFF, 0xFF]; let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::with_encoding_and_strategy(
cursor,
encoding_rs::UTF_8,
EncodingErrorStrategy::Strict
);
let mut line = String::new();
let result = decoder.read_line(&mut line);
assert!(result.is_err());
}
#[test]
fn test_decode_buf_reader_ultra_long_stream() {
let simulated_line_count = 2_700_000_000u64; let test_line_count = 100_000u64;
println!("Simulating ultra-long stream processing (would represent {} lines for 256GB+)", simulated_line_count);
struct MassiveDataStreamSimulator {
current_line: u64,
total_lines: u64,
line_template: String,
}
impl MassiveDataStreamSimulator {
fn new(total_lines: u64) -> Self {
Self {
current_line: 0,
total_lines,
line_template: "This is a simulated line number {} with substantial content to represent realistic data size including Unicode characters: 你好世界 🚀🌟⭐ and additional text to make each line approximately 100 bytes long for accurate simulation\n".to_string(),
}
}
}
impl std::io::Read for MassiveDataStreamSimulator {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.current_line >= self.total_lines {
return Ok(0); }
let line_content = self.line_template.replace("{}", &self.current_line.to_string());
self.current_line += 1;
let data = line_content.as_bytes();
let to_copy = std::cmp::min(buf.len(), data.len());
buf[..to_copy].copy_from_slice(&data[..to_copy]);
Ok(to_copy)
}
}
let reader = MassiveDataStreamSimulator::new(test_line_count);
let mut decoder = DecodeBufReader::new(reader);
let mut line_count_read = 0u64;
let mut line = String::new();
let mut total_bytes_read = 0u64;
while decoder.read_line(&mut line).unwrap() > 0 {
line_count_read += 1;
total_bytes_read += line.len() as u64;
if line_count_read <= 1000 {
assert!(line.starts_with("This is a simulated line number "));
assert!(line.contains("你好世界"));
assert!(line.contains("🚀🌟⭐"));
assert!(line.ends_with("\n"));
}
if line_count_read % 20000 == 0 {
println!("Processed {} lines, {} bytes total", line_count_read, total_bytes_read);
}
line.clear();
}
assert_eq!(line_count_read, test_line_count);
println!("Successfully processed {} lines representing {} GB+ of simulated data",
line_count_read,
(total_bytes_read as f64 / (1024.0 * 1024.0 * 1024.0)) as u64);
}
#[test]
fn test_decode_buf_reader_large_chunk_decoding() {
let large_content = "A".repeat(50000) + "\n" + &"B".repeat(30000) + "\n" + &"C".repeat(20000);
let data = large_content.as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut total_content = String::new();
let mut chunks_decoded = 0;
while decoder.decode_chunk(1000).unwrap() {
if let Some(content) = decoder.take_string() {
total_content.push_str(&content);
chunks_decoded += 1;
}
}
assert!(!total_content.is_empty());
assert!(chunks_decoded > 0);
assert!(total_content.contains("AAAAA")); assert!(total_content.contains("BBBBB")); assert!(total_content.contains("CCCCC")); }
#[test]
fn test_decode_buf_reader_multibyte_boundaries() {
let unicode_content = "🚀🌟⭐\n🌍🌎🌏\n😀😃😄\n".repeat(100);
let data = unicode_content.as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut line = String::new();
let mut lines_read = 0;
while decoder.read_line(&mut line).unwrap() > 0 {
lines_read += 1;
assert!(line.contains("🚀") || line.contains("🌍") || line.contains("😀"));
line.clear();
}
assert_eq!(lines_read, 300); }
#[test]
fn test_decode_buf_reader_mixed_encodings() {
let mut mixed_data = Vec::new();
mixed_data.extend_from_slice("Valid UTF-8 text\n".as_bytes());
mixed_data.extend_from_slice(&[0xFF, 0xFE, 0xFD]);
mixed_data.extend_from_slice("\nMore valid text\n".as_bytes());
let cursor = Cursor::new(mixed_data);
let mut decoder = DecodeBufReader::with_encoding_and_strategy(
cursor,
encoding_rs::UTF_8,
EncodingErrorStrategy::Replace, );
let mut content = String::new();
let mut line = String::new();
while decoder.read_line(&mut line).unwrap() > 0 {
content.push_str(&line);
line.clear();
}
assert!(content.contains("Valid UTF-8 text"));
assert!(content.contains("More valid text"));
}
#[test]
fn test_decode_buf_reader_buffer_boundary_conditions() {
let buffer_size = 8192;
let boundary_data = "X".repeat(buffer_size - 10) + "\n" + &"Y".repeat(buffer_size - 5);
let data = boundary_data.as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut line = String::new();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert!(line.starts_with("X"));
assert!(line.ends_with("\n"));
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert!(line.starts_with("Y"));
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert_eq!(bytes_read, 0);
}
#[test]
fn test_decode_buf_reader_zero_sized_reads() {
let data = "Test data".as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
assert!(decoder.decode_chunk(0).unwrap());
let result = decoder.take_string();
assert!(result.is_none());
}
#[test]
fn test_decode_buf_reader_consume_behavior() {
let data = "Short line 1\nShort line 2\n".as_bytes();
let cursor = Cursor::new(data);
let mut decoder = DecodeBufReader::new(cursor);
let mut line = String::new();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert_eq!(line, "Short line 1\n");
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert!(bytes_read > 0);
assert_eq!(line, "Short line 2\n");
line.clear();
let bytes_read = decoder.read_line(&mut line).unwrap();
assert_eq!(bytes_read, 0);
assert_eq!(line, "");
}
#[test]
fn test_decode_buf_reader_extremely_large_stream() {
let line_count = 100000;
println!("Testing extremely large stream with {} lines...", line_count);
struct LargeDataStream {
current_line: usize,
total_lines: usize,
line_buffer: String,
}
impl LargeDataStream {
fn new(total_lines: usize) -> Self {
Self {
current_line: 0,
total_lines,
line_buffer: String::new(),
}
}
}
impl std::io::Read for LargeDataStream {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.current_line >= self.total_lines {
return Ok(0); }
self.line_buffer.clear();
self.line_buffer.push_str(&format!(
"Line number {} with substantial content including Unicode: 你好世界 🚀🌟⭐ and numbers: {}\n",
self.current_line,
"ABC".repeat(50) ));
self.current_line += 1;
let data = self.line_buffer.as_bytes();
let to_copy = std::cmp::min(buf.len(), data.len());
buf[..to_copy].copy_from_slice(&data[..to_copy]);
Ok(to_copy)
}
}
let reader = LargeDataStream::new(line_count);
let mut decoder = DecodeBufReader::new(reader);
let mut lines_read = 0;
let mut line = String::new();
let mut total_chars_read = 0;
while decoder.read_line(&mut line).unwrap() > 0 {
lines_read += 1;
total_chars_read += line.len();
assert!(line.contains("Line number"));
assert!(line.contains("你好世界"));
assert!(line.contains("🚀🌟⭐"));
assert!(line.ends_with('\n'));
if lines_read % 10000 == 0 {
println!("Processed {} lines, {} characters total", lines_read, total_chars_read);
}
line.clear();
}
assert_eq!(lines_read, line_count);
assert!(total_chars_read > 0);
println!("Successfully processed {} lines with {} total characters", lines_read, total_chars_read);
}
#[test]
fn test_decode_buf_reader_memory_efficient_processing() {
let large_line_count = 50000;
println!("Testing memory-efficient processing with {} lines...", large_line_count);
struct MemoryEfficientReader {
current_line: usize,
total_lines: usize,
max_buffer_size: usize,
}
impl MemoryEfficientReader {
fn new(total_lines: usize) -> Self {
Self {
current_line: 0,
total_lines,
max_buffer_size: 1024, }
}
}
impl std::io::Read for MemoryEfficientReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.current_line >= self.total_lines {
return Ok(0); }
let buffer_size = std::cmp::min(self.max_buffer_size, buf.len());
if buffer_size == 0 {
return Ok(0);
}
let line_content = format!("Memory test line {}\n", self.current_line);
self.current_line += 1;
let data = line_content.as_bytes();
let to_copy = std::cmp::min(buffer_size, data.len());
buf[..to_copy].copy_from_slice(&data[..to_copy]);
Ok(to_copy)
}
}
let reader = MemoryEfficientReader::new(large_line_count);
let mut decoder = DecodeBufReader::new(reader);
let mut lines_read = 0;
let mut line = String::new();
while decoder.read_line(&mut line).unwrap() > 0 {
lines_read += 1;
assert!(line.starts_with("Memory test line "));
assert!(line.ends_with('\n'));
line.clear();
if lines_read % 10000 == 0 {
println!("Memory-efficient processing: {} lines completed", lines_read);
}
}
assert_eq!(lines_read, large_line_count);
println!("Memory-efficient processing completed successfully with {} lines", lines_read);
}