edgeparse_core/pipeline/
parallel.rs1#[cfg(not(target_arch = "wasm32"))]
11use rayon::prelude::*;
12
13use crate::models::content::ContentElement;
14
15type PageContent = Vec<ContentElement>;
17
18pub fn par_map_pages<F>(pages: &mut Vec<PageContent>, op: F)
28where
29 F: Fn(Vec<ContentElement>) -> Vec<ContentElement> + Sync + Send,
30{
31 #[cfg(not(target_arch = "wasm32"))]
32 {
33 let results: Vec<PageContent> = std::mem::take(pages).into_par_iter().map(&op).collect();
34 *pages = results;
35 }
36 #[cfg(target_arch = "wasm32")]
37 {
38 let results: Vec<PageContent> = std::mem::take(pages).into_iter().map(op).collect();
39 *pages = results;
40 }
41}
42
43pub fn par_map_pages_indexed<F>(pages: &mut Vec<PageContent>, op: F)
46where
47 F: Fn(usize, Vec<ContentElement>) -> Vec<ContentElement> + Sync + Send,
48{
49 #[cfg(not(target_arch = "wasm32"))]
50 {
51 let results: Vec<PageContent> = std::mem::take(pages)
52 .into_par_iter()
53 .enumerate()
54 .map(|(i, page)| op(i, page))
55 .collect();
56 *pages = results;
57 }
58 #[cfg(target_arch = "wasm32")]
59 {
60 let results: Vec<PageContent> = std::mem::take(pages)
61 .into_iter()
62 .enumerate()
63 .map(|(i, page)| op(i, page))
64 .collect();
65 *pages = results;
66 }
67}
68
69pub fn par_extract<T, F>(pages: &[PageContent], op: F) -> Vec<T>
73where
74 T: Send,
75 F: Fn(&[ContentElement]) -> T + Sync + Send,
76{
77 #[cfg(not(target_arch = "wasm32"))]
78 {
79 pages.par_iter().map(|page| op(page)).collect()
80 }
81 #[cfg(target_arch = "wasm32")]
82 {
83 pages.iter().map(|page| op(page)).collect()
84 }
85}
86
87#[cfg(not(target_arch = "wasm32"))]
91pub fn configure_thread_pool(num_threads: usize) -> Result<(), rayon::ThreadPoolBuildError> {
92 rayon::ThreadPoolBuilder::new()
93 .num_threads(num_threads)
94 .build_global()
95}
96
97#[cfg(target_arch = "wasm32")]
101pub fn configure_thread_pool(_num_threads: usize) -> Result<(), String> {
102 Ok(())
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108 use crate::models::bbox::BoundingBox;
109 use crate::models::chunks::TextChunk;
110 use crate::models::content::ContentElement;
111 use crate::models::enums::{PdfLayer, TextFormat, TextType};
112
113 fn text_chunk(val: &str) -> ContentElement {
114 ContentElement::TextChunk(TextChunk {
115 value: val.to_string(),
116 bbox: BoundingBox::new(None, 0.0, 0.0, 100.0, 10.0),
117 font_name: String::new(),
118 font_size: 12.0,
119 font_weight: 400.0,
120 italic_angle: 0.0,
121 font_color: String::new(),
122 contrast_ratio: 21.0,
123 symbol_ends: vec![],
124 text_format: TextFormat::Normal,
125 text_type: TextType::Regular,
126 pdf_layer: PdfLayer::Main,
127 ocg_visible: true,
128 index: None,
129 page_number: None,
130 level: None,
131 mcid: None,
132 })
133 }
134
135 #[test]
136 fn test_par_map_pages_identity() {
137 let mut pages = vec![
138 vec![text_chunk("a"), text_chunk("b")],
139 vec![text_chunk("c")],
140 ];
141 par_map_pages(&mut pages, |elems| elems);
142 assert_eq!(pages.len(), 2);
143 assert_eq!(pages[0].len(), 2);
144 assert_eq!(pages[1].len(), 1);
145 }
146
147 #[test]
148 fn test_par_map_pages_transform() {
149 let mut pages = vec![
150 vec![text_chunk("a"), text_chunk("b"), text_chunk("c")],
151 vec![text_chunk("x")],
152 ];
153 par_map_pages(&mut pages, |mut elems| {
155 elems.truncate(1);
156 elems
157 });
158 assert_eq!(pages[0].len(), 1);
159 assert_eq!(pages[1].len(), 1);
160 }
161
162 #[test]
163 fn test_par_map_pages_indexed() {
164 let mut pages = vec![
165 vec![text_chunk("a")],
166 vec![text_chunk("b")],
167 vec![text_chunk("c")],
168 ];
169 let indices_seen = std::sync::Mutex::new(vec![]);
170 par_map_pages_indexed(&mut pages, |i, elems| {
171 indices_seen.lock().unwrap().push(i);
172 elems
173 });
174 let mut seen = indices_seen.into_inner().unwrap();
175 seen.sort();
176 assert_eq!(seen, vec![0, 1, 2]);
177 }
178
179 #[test]
180 fn test_par_extract() {
181 let pages = vec![
182 vec![text_chunk("a"), text_chunk("b")],
183 vec![text_chunk("c")],
184 vec![],
185 ];
186 let counts: Vec<usize> = par_extract(&pages, |elems| elems.len());
187 assert_eq!(counts, vec![2, 1, 0]);
188 }
189
190 #[test]
191 fn test_empty_pages() {
192 let mut pages: Vec<PageContent> = vec![];
193 par_map_pages(&mut pages, |e| e);
194 assert!(pages.is_empty());
195
196 let counts: Vec<usize> = par_extract(&pages, |e| e.len());
197 assert!(counts.is_empty());
198 }
199}