nmd_core/
load_block.rs

1use std::sync::RwLock;
2
3use getset::{CopyGetters, Getters, MutGetters, Setters};
4use rayon::{iter::{IntoParallelIterator, ParallelIterator}, slice::ParallelSliceMut};
5use crate::{codex::Codex, dossier::document::chapter::{chapter_header::ChapterHeader, paragraph::Paragraph}, load::{LoadConfiguration, LoadConfigurationOverLay, LoadError}};
6
7
8
9
10
11#[derive(Debug, Getters, CopyGetters, MutGetters, Setters)]
12pub struct LoadBlock {
13
14    #[getset(get_copy = "pub", set = "pub")]
15    start: usize,
16
17    #[getset(get_copy = "pub", set = "pub")]
18    end: usize,
19
20    #[getset(get = "pub", get_mut = "pub", set = "pub")]
21    content: LoadBlockContent
22}
23
24impl LoadBlock {
25    pub fn new(start: usize, end: usize, content: LoadBlockContent) -> Self {
26        Self {
27            start,
28            end,
29            content,
30        }
31    }
32
33
34    
35
36    /// Load content from `&str` based on `Codex`
37    /// 
38    /// Blocks are not sorted, sort if you want:
39    /// 
40    /// `blocks.par_sort_by(|a, b| a.start().cmp(&b.start()));``
41    /// 
42    pub fn load_from_str(content: &str, codex: &Codex, configuration: &LoadConfiguration, configuration_overlay: LoadConfigurationOverLay) -> Result<Vec<LoadBlock>, LoadError> {
43        Self::inner_load_from_str(content, 0, codex, 0, configuration, configuration_overlay.clone())
44    }
45
46    /// Inner load method to load content from `&str` based on `Codex`
47    /// 
48    /// This method uses recursive algorithm, use `content_offset=0` and `paragraph_modifier_index=0` to start.
49    fn inner_load_from_str(current_content: &str, offset: usize, codex: &Codex, paragraph_modifier_index: usize, configuration: &LoadConfiguration, configuration_overlay: LoadConfigurationOverLay) -> Result<Vec<LoadBlock>, LoadError> {
50
51        let position_in_global_content = |position_in_current: usize| position_in_current + offset;
52
53        if let Some((modifier_identifier, (paragraph_modifier, paragraph_loading_rule))) = codex.paragraph_modifiers().get_index(paragraph_modifier_index) {
54
55            log::debug!("load using {}", modifier_identifier);
56
57            let mut current_paragraph_blocks: Vec<LoadBlock> = Vec::new();
58
59            let mut unmatched_slices: Vec<(usize, &str)> = Vec::new();
60            let mut last_position: usize = 0;
61
62            // elaborate content based on current paragraph modifier
63            for m in paragraph_modifier.modifier_pattern_regex().find_iter(current_content) {
64
65                assert!(!m.is_empty());
66
67                let m_start = m.start();
68                let m_end = m.end();
69
70                log::debug!("match found between {} and {}", m_start, m_end);
71
72                // save previous slice, it will be loaded after
73                if m_start > last_position {
74                    unmatched_slices.push((position_in_global_content(last_position), &current_content[last_position..m_start]));
75                }
76
77                last_position = m_end;
78
79                let paragraph = paragraph_loading_rule.load(m.as_str(), codex, configuration, configuration_overlay.clone())?;
80
81                if !paragraph.is_empty() {
82
83                    let block = LoadBlock::new(
84                        position_in_global_content(m_start),
85                        position_in_global_content(m_end),
86                        LoadBlockContent::Paragraph(paragraph)
87                    );
88
89                    log::debug!("added block:\n{:#?}", block);
90
91                    current_paragraph_blocks.push(block);
92                }
93            }
94
95            // take last slice (if exists)
96            if current_content.len() > last_position {
97                unmatched_slices.push((position_in_global_content(last_position), &current_content[last_position..]));
98            }
99
100
101            // load unmatched slices
102            if configuration.parallelization() {
103
104                let unmatched_slices_blocks: RwLock<Vec<LoadBlock>> = RwLock::new(Vec::new());
105
106                let errors: Vec<LoadError> = unmatched_slices.into_par_iter().map(|(offset, unmatched_slice)| -> Result<(), LoadError> {
107
108                    log::debug!("no matches using paragraph modifier {} on:\n{}\n(offset: {})", modifier_identifier, unmatched_slice, offset);
109    
110                    let mut blocks = Self::inner_load_from_str(unmatched_slice, offset, codex, paragraph_modifier_index + 1, configuration, configuration_overlay.clone())?;
111                
112                    unmatched_slices_blocks.write().unwrap().append(&mut blocks);
113
114                    Ok(())
115                })
116                .filter(|result| result.is_err())
117                .map(|result| result.err().unwrap())
118                .collect();
119
120                if errors.len() > 0 {
121                    return Err(LoadError::BucketOfErrors(errors))
122                }
123
124                let mut unmatched_slices_blocks = unmatched_slices_blocks.into_inner().unwrap();
125
126                current_paragraph_blocks.append(&mut unmatched_slices_blocks);
127
128            } else {
129
130                let mut unmatched_slices_blocks: Vec<LoadBlock> = Vec::new();
131
132                for (offset, unmatched_slice) in unmatched_slices {
133    
134                    log::debug!("try next paragraph modifier on:\n{}\n(offset: {})", unmatched_slice, offset);
135    
136                    let mut blocks = Self::inner_load_from_str(unmatched_slice, offset, codex, paragraph_modifier_index + 1, configuration, configuration_overlay.clone())?;
137                
138                    unmatched_slices_blocks.append(&mut blocks);
139                }
140
141                current_paragraph_blocks.append(&mut unmatched_slices_blocks);
142            }
143
144            return Ok(current_paragraph_blocks)
145
146        } else {    // => there are no other modifiers
147
148            log::debug!("next content contains headings and/or fallback paragraph:\n{}", current_content);
149
150            if codex.fallback_paragraph().is_none()  {
151
152                log::warn!("there isn't fallback paragraph loading rule")
153            }
154
155            // load headings
156            let mut headers_blocks = ChapterHeader::load(current_content, codex, configuration)?;
157
158            headers_blocks.par_sort_by(|a, b| a.start().cmp(&b.start()));
159
160            let mut blocks: Vec<LoadBlock> = Vec::new();
161
162            let mut add_fb_blocks = |raw_fb_paragraph: &str, start: usize, end: usize| -> Result<(), LoadError> {
163
164                if let Some((fb_id, fallback_loading_rule)) = codex.fallback_paragraph() {
165
166                    log::debug!("fallback rule {} will be used to load:\n{}", fb_id, raw_fb_paragraph);
167
168                    let paragraphs = fallback_loading_rule.load(raw_fb_paragraph, codex, configuration, configuration_overlay.clone())?;
169
170                    let len = paragraphs.len();
171                    assert!((end - start) > len);
172
173                    for (index, paragraph) in paragraphs.into_iter().enumerate() {
174
175                        let fake_start = start + ((end - start) / len * index); 
176                        let fake_end = start + ((end - start) / len * (index + 1)); 
177
178                        let block = LoadBlock::new(
179                            fake_start,
180                            fake_end,
181                            LoadBlockContent::Paragraph(paragraph)
182                        );
183
184                        log::debug!("generated fallback blocks:\n{:#?}", block);
185    
186                        blocks.push(block);
187                    }
188                }
189
190                Ok(())
191            };
192
193            let mut last_position = 0;
194
195            // assign fallback paragraph
196            for header_block in headers_blocks.iter_mut() {
197
198                if header_block.start() > last_position {
199
200                    let start = last_position;
201                    let global_start = position_in_global_content(last_position);
202                    let end = header_block.start();
203                    let global_end = position_in_global_content(header_block.start());
204
205                    let s = &current_content[start..end];
206
207                    log::debug!("found not header slice between {} (global pos: {}) and {} (global pos: {}) of current content:\n{}", start, global_start, end, global_end, s);
208
209                    add_fb_blocks(
210                        s,
211                        global_start,
212                        global_end
213                    )?;
214                }
215
216                last_position = header_block.end();
217
218                header_block.set_start(position_in_global_content(header_block.start()));
219                header_block.set_end(position_in_global_content(header_block.end()));
220            }
221
222            log::debug!("last heading found at position (of current content): {}/{}", last_position, current_content.len());
223
224            if current_content.len() > last_position {
225
226                let s = &current_content[last_position..];
227
228                add_fb_blocks(
229                    s,
230                    position_in_global_content(last_position),
231                    position_in_global_content(current_content.len())
232                )?;
233            }
234
235            blocks.append(&mut headers_blocks);
236
237            return Ok(blocks);
238        }
239    }
240
241    
242}
243
244impl Into<LoadBlockContent> for LoadBlock {
245    fn into(self) -> LoadBlockContent {
246        self.content
247    }
248}
249
250impl TryInto<Box<dyn Paragraph>> for LoadBlock {
251    type Error = String;
252
253    fn try_into(self) -> Result<Box<dyn Paragraph>, Self::Error> {
254        if let LoadBlockContent::Paragraph(p) = self.content {
255            return Ok(p)
256        }
257
258        Err(String::from("this block doesn't contain a paragraph"))
259    }
260}
261
262#[derive(Debug)]
263pub enum LoadBlockContent {
264    Paragraph(Box<dyn Paragraph>),
265    ChapterHeader(ChapterHeader)
266}
267
268
269
270#[cfg(test)]
271mod test {
272
273    use super::*;
274
275    #[test]
276    fn paragraphs_from_str() {
277        let content = concat!(
278            "paragraph1",
279            "\n\n",
280            "paragraph2a\nparagraph2b",
281            "\n\n",
282            "paragraph3",
283        );
284
285        let codex = Codex::of_html();
286
287        let paragraphs = LoadBlock::load_from_str(content, &codex, &LoadConfiguration::default(), LoadConfigurationOverLay::default()).unwrap();
288
289        assert_eq!(paragraphs.len(), 3)
290    }
291}