1use std::sync::RwLock;
2
3use getset::{CopyGetters, Getters, MutGetters, Setters};
4use rayon::{iter::{IntoParallelIterator, ParallelIterator}, slice::ParallelSliceMut};
5use crate::{codex::Codex, dossier::document::chapter::{chapter_header::ChapterHeader, paragraph::Paragraph}, load::{LoadConfiguration, LoadConfigurationOverLay, LoadError}};
6
7
8
9
10
11#[derive(Debug, Getters, CopyGetters, MutGetters, Setters)]
12pub struct LoadBlock {
13
14 #[getset(get_copy = "pub", set = "pub")]
15 start: usize,
16
17 #[getset(get_copy = "pub", set = "pub")]
18 end: usize,
19
20 #[getset(get = "pub", get_mut = "pub", set = "pub")]
21 content: LoadBlockContent
22}
23
24impl LoadBlock {
25 pub fn new(start: usize, end: usize, content: LoadBlockContent) -> Self {
26 Self {
27 start,
28 end,
29 content,
30 }
31 }
32
33
34
35
36 pub fn load_from_str(content: &str, codex: &Codex, configuration: &LoadConfiguration, configuration_overlay: LoadConfigurationOverLay) -> Result<Vec<LoadBlock>, LoadError> {
43 Self::inner_load_from_str(content, 0, codex, 0, configuration, configuration_overlay.clone())
44 }
45
46 fn inner_load_from_str(current_content: &str, offset: usize, codex: &Codex, paragraph_modifier_index: usize, configuration: &LoadConfiguration, configuration_overlay: LoadConfigurationOverLay) -> Result<Vec<LoadBlock>, LoadError> {
50
51 let position_in_global_content = |position_in_current: usize| position_in_current + offset;
52
53 if let Some((modifier_identifier, (paragraph_modifier, paragraph_loading_rule))) = codex.paragraph_modifiers().get_index(paragraph_modifier_index) {
54
55 log::debug!("load using {}", modifier_identifier);
56
57 let mut current_paragraph_blocks: Vec<LoadBlock> = Vec::new();
58
59 let mut unmatched_slices: Vec<(usize, &str)> = Vec::new();
60 let mut last_position: usize = 0;
61
62 for m in paragraph_modifier.modifier_pattern_regex().find_iter(current_content) {
64
65 assert!(!m.is_empty());
66
67 let m_start = m.start();
68 let m_end = m.end();
69
70 log::debug!("match found between {} and {}", m_start, m_end);
71
72 if m_start > last_position {
74 unmatched_slices.push((position_in_global_content(last_position), ¤t_content[last_position..m_start]));
75 }
76
77 last_position = m_end;
78
79 let paragraph = paragraph_loading_rule.load(m.as_str(), codex, configuration, configuration_overlay.clone())?;
80
81 if !paragraph.is_empty() {
82
83 let block = LoadBlock::new(
84 position_in_global_content(m_start),
85 position_in_global_content(m_end),
86 LoadBlockContent::Paragraph(paragraph)
87 );
88
89 log::debug!("added block:\n{:#?}", block);
90
91 current_paragraph_blocks.push(block);
92 }
93 }
94
95 if current_content.len() > last_position {
97 unmatched_slices.push((position_in_global_content(last_position), ¤t_content[last_position..]));
98 }
99
100
101 if configuration.parallelization() {
103
104 let unmatched_slices_blocks: RwLock<Vec<LoadBlock>> = RwLock::new(Vec::new());
105
106 let errors: Vec<LoadError> = unmatched_slices.into_par_iter().map(|(offset, unmatched_slice)| -> Result<(), LoadError> {
107
108 log::debug!("no matches using paragraph modifier {} on:\n{}\n(offset: {})", modifier_identifier, unmatched_slice, offset);
109
110 let mut blocks = Self::inner_load_from_str(unmatched_slice, offset, codex, paragraph_modifier_index + 1, configuration, configuration_overlay.clone())?;
111
112 unmatched_slices_blocks.write().unwrap().append(&mut blocks);
113
114 Ok(())
115 })
116 .filter(|result| result.is_err())
117 .map(|result| result.err().unwrap())
118 .collect();
119
120 if errors.len() > 0 {
121 return Err(LoadError::BucketOfErrors(errors))
122 }
123
124 let mut unmatched_slices_blocks = unmatched_slices_blocks.into_inner().unwrap();
125
126 current_paragraph_blocks.append(&mut unmatched_slices_blocks);
127
128 } else {
129
130 let mut unmatched_slices_blocks: Vec<LoadBlock> = Vec::new();
131
132 for (offset, unmatched_slice) in unmatched_slices {
133
134 log::debug!("try next paragraph modifier on:\n{}\n(offset: {})", unmatched_slice, offset);
135
136 let mut blocks = Self::inner_load_from_str(unmatched_slice, offset, codex, paragraph_modifier_index + 1, configuration, configuration_overlay.clone())?;
137
138 unmatched_slices_blocks.append(&mut blocks);
139 }
140
141 current_paragraph_blocks.append(&mut unmatched_slices_blocks);
142 }
143
144 return Ok(current_paragraph_blocks)
145
146 } else { log::debug!("next content contains headings and/or fallback paragraph:\n{}", current_content);
149
150 if codex.fallback_paragraph().is_none() {
151
152 log::warn!("there isn't fallback paragraph loading rule")
153 }
154
155 let mut headers_blocks = ChapterHeader::load(current_content, codex, configuration)?;
157
158 headers_blocks.par_sort_by(|a, b| a.start().cmp(&b.start()));
159
160 let mut blocks: Vec<LoadBlock> = Vec::new();
161
162 let mut add_fb_blocks = |raw_fb_paragraph: &str, start: usize, end: usize| -> Result<(), LoadError> {
163
164 if let Some((fb_id, fallback_loading_rule)) = codex.fallback_paragraph() {
165
166 log::debug!("fallback rule {} will be used to load:\n{}", fb_id, raw_fb_paragraph);
167
168 let paragraphs = fallback_loading_rule.load(raw_fb_paragraph, codex, configuration, configuration_overlay.clone())?;
169
170 let len = paragraphs.len();
171 assert!((end - start) > len);
172
173 for (index, paragraph) in paragraphs.into_iter().enumerate() {
174
175 let fake_start = start + ((end - start) / len * index);
176 let fake_end = start + ((end - start) / len * (index + 1));
177
178 let block = LoadBlock::new(
179 fake_start,
180 fake_end,
181 LoadBlockContent::Paragraph(paragraph)
182 );
183
184 log::debug!("generated fallback blocks:\n{:#?}", block);
185
186 blocks.push(block);
187 }
188 }
189
190 Ok(())
191 };
192
193 let mut last_position = 0;
194
195 for header_block in headers_blocks.iter_mut() {
197
198 if header_block.start() > last_position {
199
200 let start = last_position;
201 let global_start = position_in_global_content(last_position);
202 let end = header_block.start();
203 let global_end = position_in_global_content(header_block.start());
204
205 let s = ¤t_content[start..end];
206
207 log::debug!("found not header slice between {} (global pos: {}) and {} (global pos: {}) of current content:\n{}", start, global_start, end, global_end, s);
208
209 add_fb_blocks(
210 s,
211 global_start,
212 global_end
213 )?;
214 }
215
216 last_position = header_block.end();
217
218 header_block.set_start(position_in_global_content(header_block.start()));
219 header_block.set_end(position_in_global_content(header_block.end()));
220 }
221
222 log::debug!("last heading found at position (of current content): {}/{}", last_position, current_content.len());
223
224 if current_content.len() > last_position {
225
226 let s = ¤t_content[last_position..];
227
228 add_fb_blocks(
229 s,
230 position_in_global_content(last_position),
231 position_in_global_content(current_content.len())
232 )?;
233 }
234
235 blocks.append(&mut headers_blocks);
236
237 return Ok(blocks);
238 }
239 }
240
241
242}
243
244impl Into<LoadBlockContent> for LoadBlock {
245 fn into(self) -> LoadBlockContent {
246 self.content
247 }
248}
249
250impl TryInto<Box<dyn Paragraph>> for LoadBlock {
251 type Error = String;
252
253 fn try_into(self) -> Result<Box<dyn Paragraph>, Self::Error> {
254 if let LoadBlockContent::Paragraph(p) = self.content {
255 return Ok(p)
256 }
257
258 Err(String::from("this block doesn't contain a paragraph"))
259 }
260}
261
262#[derive(Debug)]
263pub enum LoadBlockContent {
264 Paragraph(Box<dyn Paragraph>),
265 ChapterHeader(ChapterHeader)
266}
267
268
269
270#[cfg(test)]
271mod test {
272
273 use super::*;
274
275 #[test]
276 fn paragraphs_from_str() {
277 let content = concat!(
278 "paragraph1",
279 "\n\n",
280 "paragraph2a\nparagraph2b",
281 "\n\n",
282 "paragraph3",
283 );
284
285 let codex = Codex::of_html();
286
287 let paragraphs = LoadBlock::load_from_str(content, &codex, &LoadConfiguration::default(), LoadConfigurationOverLay::default()).unwrap();
288
289 assert_eq!(paragraphs.len(), 3)
290 }
291}