use crate::TurtleParser;
use memchr::memchr;
use std::fs::File;
use std::io;
use std::io::{Read, Seek, SeekFrom};
pub fn get_ntriples_slice_chunks(bytes: &[u8], n_chunks: usize) -> Vec<(usize, usize)> {
let mut last_pos = 0;
let total_len = bytes.len();
let chunk_size = total_len / n_chunks;
let mut offsets = Vec::with_capacity(n_chunks);
for _ in 0..n_chunks {
let search_pos = last_pos + chunk_size;
if search_pos >= bytes.len() {
break;
}
let Some(pos) = next_newline_position(&bytes[search_pos..]) else {
break;
};
let end_pos = search_pos + pos;
offsets.push((last_pos, end_pos));
last_pos = end_pos;
}
if last_pos < total_len {
offsets.push((last_pos, total_len));
}
offsets
}
fn next_newline_position(input: &[u8]) -> Option<usize> {
Some(memchr(b'\n', input)? + 1)
}
pub fn get_ntriples_file_chunks(
file: &mut File,
file_size: u64,
n_chunks: usize,
) -> io::Result<Vec<(u64, u64)>> {
let mut last_pos = 0;
let chunk_size = file_size / u64::try_from(n_chunks).map_err(io::Error::other)?;
let mut offsets = Vec::with_capacity(n_chunks);
let mut buffer = [0; 4096];
for _ in 0..n_chunks {
let search_pos = last_pos + chunk_size;
if search_pos >= file_size {
break;
}
let Some(end_pos) = ({
file.seek(SeekFrom::Start(search_pos))?;
let mut pos = search_pos;
loop {
let extra = file.read(&mut buffer)?;
if extra == 0 {
break None;
}
if let Some(extra) = next_newline_position(&buffer[..extra]) {
pos += u64::try_from(extra).map_err(io::Error::other)?;
break Some(pos);
}
pos += u64::try_from(extra).map_err(io::Error::other)?;
}
}) else {
break;
};
offsets.push((last_pos, end_pos));
last_pos = end_pos;
}
if last_pos < file_size {
offsets.push((last_pos, file_size));
}
Ok(offsets)
}
pub fn get_turtle_slice_chunks(
bytes: &[u8],
n_chunks: usize,
parser: &TurtleParser,
) -> Vec<(usize, usize)> {
let mut last_pos = 0;
let total_len = bytes.len();
let chunk_size = total_len / n_chunks;
let mut offsets = Vec::with_capacity(n_chunks);
for _ in 0..n_chunks {
let search_pos = last_pos + chunk_size;
if search_pos >= bytes.len() {
break;
}
let Some(pos) = next_terminating_char(parser, &bytes[search_pos..]) else {
break;
};
let end_pos = search_pos + pos;
offsets.push((last_pos, end_pos));
last_pos = end_pos;
}
if last_pos < total_len {
offsets.push((last_pos, total_len));
}
offsets
}
fn next_terminating_char(parser: &TurtleParser, mut input: &[u8]) -> Option<usize> {
fn accept(parser: TurtleParser, input: &[u8]) -> bool {
let mut f = parser.for_slice(input);
for _ in 0..3 {
if let Some(r) = f.next() {
if r.is_err() {
return false;
}
} else {
return false;
}
}
true
}
let mut total_pos = 0;
for _ in 0..1_000 {
let pos = memchr(b'.', input)? + 1;
if pos >= input.len() {
return None;
}
let new_input = &input[pos..];
let p = parser.clone();
let accepted = accept(p, new_input);
if accepted {
return Some(total_pos + pos);
}
input = &input[pos + 1..];
total_pos += pos + 1;
}
None
}