#[cfg(not(target_arch = "wasm32"))]
use rayon::prelude::*;
use crate::models::content::ContentElement;
type PageContent = Vec<ContentElement>;
pub fn par_map_pages<F>(pages: &mut Vec<PageContent>, op: F)
where
F: Fn(Vec<ContentElement>) -> Vec<ContentElement> + Sync + Send,
{
#[cfg(not(target_arch = "wasm32"))]
{
let results: Vec<PageContent> = std::mem::take(pages).into_par_iter().map(&op).collect();
*pages = results;
}
#[cfg(target_arch = "wasm32")]
{
let results: Vec<PageContent> = std::mem::take(pages).into_iter().map(op).collect();
*pages = results;
}
}
pub fn par_map_pages_indexed<F>(pages: &mut Vec<PageContent>, op: F)
where
F: Fn(usize, Vec<ContentElement>) -> Vec<ContentElement> + Sync + Send,
{
#[cfg(not(target_arch = "wasm32"))]
{
let results: Vec<PageContent> = std::mem::take(pages)
.into_par_iter()
.enumerate()
.map(|(i, page)| op(i, page))
.collect();
*pages = results;
}
#[cfg(target_arch = "wasm32")]
{
let results: Vec<PageContent> = std::mem::take(pages)
.into_iter()
.enumerate()
.map(|(i, page)| op(i, page))
.collect();
*pages = results;
}
}
pub fn par_extract<T, F>(pages: &[PageContent], op: F) -> Vec<T>
where
T: Send,
F: Fn(&[ContentElement]) -> T + Sync + Send,
{
#[cfg(not(target_arch = "wasm32"))]
{
pages.par_iter().map(|page| op(page)).collect()
}
#[cfg(target_arch = "wasm32")]
{
pages.iter().map(|page| op(page)).collect()
}
}
#[cfg(not(target_arch = "wasm32"))]
pub fn configure_thread_pool(num_threads: usize) -> Result<(), rayon::ThreadPoolBuildError> {
rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global()
}
#[cfg(target_arch = "wasm32")]
pub fn configure_thread_pool(_num_threads: usize) -> Result<(), String> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::bbox::BoundingBox;
use crate::models::chunks::TextChunk;
use crate::models::content::ContentElement;
use crate::models::enums::{PdfLayer, TextFormat, TextType};
fn text_chunk(val: &str) -> ContentElement {
ContentElement::TextChunk(TextChunk {
value: val.to_string(),
bbox: BoundingBox::new(None, 0.0, 0.0, 100.0, 10.0),
font_name: String::new(),
font_size: 12.0,
font_weight: 400.0,
italic_angle: 0.0,
font_color: String::new(),
contrast_ratio: 21.0,
symbol_ends: vec![],
text_format: TextFormat::Normal,
text_type: TextType::Regular,
pdf_layer: PdfLayer::Main,
ocg_visible: true,
index: None,
page_number: None,
level: None,
mcid: None,
})
}
#[test]
fn test_par_map_pages_identity() {
let mut pages = vec![
vec![text_chunk("a"), text_chunk("b")],
vec![text_chunk("c")],
];
par_map_pages(&mut pages, |elems| elems);
assert_eq!(pages.len(), 2);
assert_eq!(pages[0].len(), 2);
assert_eq!(pages[1].len(), 1);
}
#[test]
fn test_par_map_pages_transform() {
let mut pages = vec![
vec![text_chunk("a"), text_chunk("b"), text_chunk("c")],
vec![text_chunk("x")],
];
par_map_pages(&mut pages, |mut elems| {
elems.truncate(1);
elems
});
assert_eq!(pages[0].len(), 1);
assert_eq!(pages[1].len(), 1);
}
#[test]
fn test_par_map_pages_indexed() {
let mut pages = vec![
vec![text_chunk("a")],
vec![text_chunk("b")],
vec![text_chunk("c")],
];
let indices_seen = std::sync::Mutex::new(vec![]);
par_map_pages_indexed(&mut pages, |i, elems| {
indices_seen.lock().unwrap().push(i);
elems
});
let mut seen = indices_seen.into_inner().unwrap();
seen.sort();
assert_eq!(seen, vec![0, 1, 2]);
}
#[test]
fn test_par_extract() {
let pages = vec![
vec![text_chunk("a"), text_chunk("b")],
vec![text_chunk("c")],
vec![],
];
let counts: Vec<usize> = par_extract(&pages, |elems| elems.len());
assert_eq!(counts, vec![2, 1, 0]);
}
#[test]
fn test_empty_pages() {
let mut pages: Vec<PageContent> = vec![];
par_map_pages(&mut pages, |e| e);
assert!(pages.is_empty());
let counts: Vec<usize> = par_extract(&pages, |e| e.len());
assert!(counts.is_empty());
}
}