1use rat_text::upos_type;
5use std::ops::Range;
6use unicode_segmentation::UnicodeSegmentation;
7
8#[derive(Debug)]
10pub struct MDHeader<'a> {
11 pub header: u8,
12 pub prefix: &'a str,
13 pub tag: &'a str,
14 pub text: &'a str,
15 pub text_byte: Range<usize>,
16}
17
18pub fn parse_md_header(relocate: usize, txt: &str) -> Option<MDHeader<'_>> {
22 let mut mark_prefix_end = 0;
23 let mut mark_tag_start = 0;
24 let mut mark_tag_end = 0;
25 let mut mark_text_start = 0;
26
27 #[derive(Debug, PartialEq)]
28 enum It {
29 Leading,
30 Tag,
31 LeadingText,
32 Text,
33 End,
34 Fail,
35 }
36
37 let mut state = It::Leading;
38 for (idx, c) in txt.bytes().enumerate() {
39 if state == It::Leading {
40 if c == b' ' || c == b'\t' {
41 mark_prefix_end = idx + 1;
42 mark_tag_start = idx + 1;
43 mark_tag_end = idx + 1;
44 mark_text_start = idx + 1;
45 } else if c == b'#' {
46 mark_prefix_end = idx;
47 mark_tag_start = idx;
48 mark_tag_end = idx + 1;
49 mark_text_start = idx + 1;
50 state = It::Tag;
51 } else {
52 state = It::Fail;
53 break;
54 }
55 } else if state == It::Tag {
56 if c == b'#' {
57 mark_tag_end = idx;
58 mark_text_start = idx + 1;
59 } else {
60 mark_tag_end = idx;
61 mark_text_start = idx + 1;
62 state = It::LeadingText;
63 }
64 } else if state == It::LeadingText {
65 if c == b' ' || c == b'\t' {
66 mark_text_start = idx + 1;
67 } else {
69 mark_text_start = idx;
70 state = It::Text;
71 }
72 } else if state == It::Text {
73 state = It::End;
74 break;
75 }
76 }
77
78 if state == It::Fail {
79 return None;
80 }
81
82 Some(MDHeader {
83 header: (mark_tag_end - mark_tag_start) as u8,
84 prefix: &txt[..mark_prefix_end],
85 tag: &txt[mark_tag_start..mark_tag_end],
86 text: &txt[mark_text_start..],
87 text_byte: relocate + mark_text_start..relocate + txt.len(),
88 })
89}
90
91#[derive(Debug)]
93pub struct MDLinkRef<'a> {
94 pub prefix: &'a str,
95 pub tag: &'a str,
96 pub link: &'a str,
97 pub title: &'a str,
98 pub suffix: &'a str,
99}
100
101pub fn parse_md_link_ref(_relocate: usize, txt: &str) -> Option<MDLinkRef<'_>> {
105 let mut mark_prefix_end = 0;
106 let mut mark_tag_start = 0;
107 let mut mark_tag_end = 0;
108 let mut mark_link_start = 0;
109 let mut mark_link_end = 0;
110 let mut mark_title_start = 0;
111 let mut mark_title_end = 0;
112
113 #[derive(Debug, PartialEq)]
114 enum It {
115 Leading,
116 Tag,
117 AfterTag,
118 LeadingLink,
119 BracketLink,
120 Link,
121 LinkEsc,
122 LeadingTitle,
123 TitleSingle,
124 TitleSingleEsc,
125 TitleDouble,
126 TitleDoubleEsc,
127 End,
128 Fail,
129 }
130
131 let mut state = It::Leading;
132 for (idx, c) in txt.bytes().enumerate() {
133 if state == It::Leading {
134 if c == b'[' {
135 mark_prefix_end = idx;
136 mark_tag_start = idx + 1;
137 mark_tag_end = idx + 1;
138 mark_link_start = idx + 1;
139 mark_link_end = idx + 1;
140 mark_title_start = idx + 1;
141 mark_title_end = idx + 1;
142 state = It::Tag;
143 } else if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
144 mark_prefix_end = idx + 1;
145 mark_tag_start = idx + 1;
146 mark_tag_end = idx + 1;
147 mark_link_start = idx + 1;
148 mark_link_end = idx + 1;
149 mark_title_start = idx + 1;
150 mark_title_end = idx + 1;
151 } else {
152 state = It::Fail;
153 break;
154 }
155 } else if state == It::Tag {
156 if c == b']' {
157 mark_tag_end = idx;
158 mark_link_start = idx + 1;
159 mark_link_end = idx + 1;
160 mark_title_start = idx + 1;
161 mark_title_end = idx + 1;
162 state = It::AfterTag;
163 } else {
164 mark_tag_end = idx;
165 mark_link_start = idx + 1;
166 mark_link_end = idx + 1;
167 mark_title_start = idx + 1;
168 mark_title_end = idx + 1;
169 }
170 } else if state == It::AfterTag {
171 if c == b':' {
172 mark_link_start = idx + 1;
173 mark_link_end = idx + 1;
174 mark_title_start = idx + 1;
175 mark_title_end = idx + 1;
176 state = It::LeadingLink;
177 } else {
178 state = It::Fail;
179 break;
180 }
181 } else if state == It::LeadingLink {
182 if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
183 mark_link_start = idx + 1;
184 mark_link_end = idx + 1;
185 mark_title_start = idx + 1;
186 mark_title_end = idx + 1;
187 } else if c == b'<' {
189 mark_link_start = idx + 1;
190 mark_link_end = idx + 1;
191 mark_title_start = idx + 1;
192 mark_title_end = idx + 1;
193 state = It::BracketLink;
194 } else {
195 mark_link_start = idx;
196 mark_link_end = idx;
197 mark_title_start = idx;
198 mark_title_end = idx;
199 state = It::Link;
200 }
201 } else if state == It::BracketLink {
202 if c == b'>' {
203 mark_link_end = idx;
204 mark_title_start = idx + 1;
205 mark_title_end = idx + 1;
206 state = It::LeadingTitle;
207 } else {
208 mark_link_end = idx;
209 mark_title_start = idx;
210 mark_title_end = idx;
211 }
212 } else if state == It::Link {
213 if c == b'\\' {
214 mark_link_end = idx;
215 mark_title_start = idx;
216 mark_title_end = idx;
217 state = It::LinkEsc;
218 } else if c == b'\n' || c == b'\r' {
219 mark_link_end = idx;
220 mark_title_start = idx + 1;
221 mark_title_end = idx + 1;
222 state = It::LeadingTitle;
223 } else if c == b'\'' {
224 mark_link_end = idx;
225 mark_title_start = idx + 1;
226 mark_title_end = idx + 1;
227 state = It::TitleSingle;
228 } else if c == b'"' {
229 mark_link_end = idx;
230 mark_title_start = idx + 1;
231 mark_title_end = idx + 1;
232 state = It::TitleDouble;
233 } else {
234 mark_link_end = idx;
235 mark_title_start = idx;
236 mark_title_end = idx;
237 }
238 } else if state == It::LinkEsc {
239 mark_link_end = idx;
240 mark_title_start = idx;
241 mark_title_end = idx;
242 state = It::Link;
243 } else if state == It::LeadingTitle {
244 if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
245 mark_title_start = idx + 1;
246 mark_title_end = idx + 1;
247 } else if c == b'\'' {
248 mark_title_start = idx + 1;
249 mark_title_end = idx + 1;
250 state = It::TitleSingle;
251 } else if c == b'"' {
252 mark_title_start = idx + 1;
253 mark_title_end = idx + 1;
254 state = It::TitleDouble;
255 } else {
256 mark_title_start = idx;
258 mark_title_end = idx;
259 state = It::End;
260 break;
261 }
262 } else if state == It::TitleSingle {
263 if c == b'\'' {
264 mark_title_end = idx;
265 state = It::End;
266 break;
267 } else if c == b'\\' {
268 mark_title_end = idx;
269 state = It::TitleSingleEsc;
270 } else {
271 mark_title_end = idx;
272 }
273 } else if state == It::TitleSingleEsc {
274 mark_title_end = idx;
275 state = It::TitleSingle;
276 } else if state == It::TitleDouble {
277 if c == b'"' {
278 mark_title_end = idx;
279 state = It::End;
280 break;
281 } else if c == b'\\' {
282 mark_title_end = idx;
283 state = It::TitleDoubleEsc;
284 } else {
285 mark_title_end = idx;
286 }
287 } else if state == It::TitleDoubleEsc {
288 mark_title_end = idx;
289 state = It::TitleDouble;
290 }
291 }
292
293 if state == It::Fail {
294 return None;
295 }
296
297 Some(MDLinkRef {
298 prefix: &txt[..mark_prefix_end],
299 tag: &txt[mark_tag_start..mark_tag_end],
300 link: &txt[mark_link_start..mark_link_end],
301 title: &txt[mark_title_start..mark_title_end],
302 suffix: &txt[mark_title_end..],
303 })
304}
305
306#[derive(Debug)]
308pub struct MDItem<'a> {
309 pub prefix: &'a str,
310 pub mark_bytes: Range<usize>,
311 pub mark: &'a str,
312 pub mark_suffix: &'a str,
313 pub mark_nr: Option<usize>,
314 pub text_prefix: &'a str,
315 pub text_bytes: Range<usize>,
316 pub text: &'a str,
317}
318
319pub fn parse_md_item(relocate: usize, txt: &str) -> Option<MDItem<'_>> {
321 let mut mark_byte = 0;
322 let mut mark_suffix_byte = 0;
323 let mut text_prefix_byte = 0;
324 let mut text_byte = 0;
325
326 let mut mark_nr = None;
327
328 #[derive(Debug, PartialEq)]
329 enum It {
330 Leading,
331 OrderedMark,
332 TextLeading,
333 Fail,
334 End,
335 }
336
337 let mut state = It::Leading;
338 for (idx, c) in txt.bytes().enumerate() {
339 if state == It::Leading {
340 if c == b'+' || c == b'-' || c == b'*' {
341 mark_byte = idx;
342 mark_suffix_byte = idx + 1;
343 text_prefix_byte = idx + 1;
344 text_byte = idx + 1;
345 state = It::TextLeading;
346 } else if c.is_ascii_digit() {
347 mark_byte = idx;
348 state = It::OrderedMark;
349 } else if c == b' ' || c == b'\t' {
350 } else {
352 state = It::Fail;
353 break;
354 }
355 } else if state == It::OrderedMark {
356 if c.is_ascii_digit() {
357 } else if c == b'.' || c == b')' {
359 mark_suffix_byte = idx;
360 text_prefix_byte = idx + 1;
361 text_byte = idx + 1;
362 mark_nr = Some(
363 txt[mark_byte..mark_suffix_byte]
364 .parse::<usize>()
365 .expect("nr"),
366 );
367 state = It::TextLeading;
368 } else {
369 state = It::Fail;
370 break;
371 }
372 } else if state == It::TextLeading {
373 if c == b' ' || c == b'\t' {
374 } else {
376 text_byte = idx;
377 state = It::End;
378 break;
379 }
380 }
381 }
382
383 if state == It::Fail {
384 return None;
385 }
386
387 Some(MDItem {
388 prefix: &txt[0..mark_byte],
389 mark_bytes: relocate + mark_byte..relocate + text_prefix_byte,
390 mark: &txt[mark_byte..mark_suffix_byte],
391 mark_suffix: &txt[mark_suffix_byte..text_prefix_byte],
392 mark_nr,
393 text_prefix: &txt[text_prefix_byte..text_byte],
394 text_bytes: relocate + text_byte..relocate + txt.len(),
395 text: &txt[text_byte..],
396 })
397}
398
399#[derive(Debug)]
401pub struct MDCell<'a> {
402 pub txt: &'a str,
403 pub txt_graphemes: Range<upos_type>,
404 pub txt_bytes: Range<usize>,
405}
406
407#[derive(Debug)]
409pub struct MDRow<'a> {
410 pub row: Vec<MDCell<'a>>,
411 pub cursor_cell: usize,
413 pub cursor_offset: upos_type,
415 pub cursor_byte_offset: usize,
417}
418
419pub fn parse_md_row(relocate: usize, txt: &str, x: upos_type) -> MDRow<'_> {
422 let mut tmp = MDRow {
423 row: Default::default(),
424 cursor_cell: 0,
425 cursor_offset: 0,
426 cursor_byte_offset: 0,
427 };
428
429 let mut grapheme_start = 0;
430 let mut grapheme_last = 0;
431 let mut esc = false;
432 let mut cell_offset = 0;
433 let mut cell_byte_start = 0;
434 for (idx, (byte_idx, c)) in txt.grapheme_indices(true).enumerate() {
435 if idx == x as usize {
436 tmp.cursor_cell = tmp.row.len();
437 tmp.cursor_offset = cell_offset;
438 tmp.cursor_byte_offset = byte_idx - cell_byte_start;
439 }
440
441 if c == "\\" {
442 cell_offset += 1;
443 esc = true;
444 } else if c == "|" && !esc {
445 cell_offset = 0;
446 tmp.row.push(MDCell {
447 txt: &txt[cell_byte_start..byte_idx],
448 txt_graphemes: grapheme_start..idx as upos_type,
449 txt_bytes: relocate + cell_byte_start..relocate + byte_idx,
450 });
451 cell_byte_start = byte_idx + 1;
452 grapheme_start = idx as upos_type + 1;
453 } else {
454 cell_offset += 1;
455 esc = false;
456 }
457
458 grapheme_last = idx as upos_type;
459 }
460
461 tmp.row.push(MDCell {
462 txt: &txt[cell_byte_start..txt.len()],
463 txt_graphemes: grapheme_start..grapheme_last,
464 txt_bytes: relocate + cell_byte_start..relocate + txt.len(),
465 });
466
467 tmp
468}
469
470#[derive(Debug)]
472pub struct MDBlockQuote<'a> {
473 pub quote: &'a str,
474 pub text_prefix: &'a str,
475 pub text_bytes: Range<usize>,
476 pub text: &'a str,
477}
478
479pub fn parse_md_block_quote(relocate: usize, txt: &str) -> Option<MDBlockQuote<'_>> {
483 let mut quote_byte = 0;
484 let mut text_prefix_byte = 0;
485 let mut text_byte = 0;
486
487 #[derive(Debug, PartialEq)]
488 enum It {
489 Leading,
490 TextLeading,
491 Text,
492 End,
493 Fail,
494 }
495
496 let mut state = It::Leading;
497 for (idx, c) in txt.bytes().enumerate() {
498 if state == It::Leading {
499 if c == b'>' {
500 quote_byte = idx;
501 text_prefix_byte = idx + 1;
502 state = It::TextLeading;
503 } else if c == b' ' || c == b'\t' {
504 } else {
506 state = It::Fail;
507 break;
508 }
509 } else if state == It::TextLeading {
510 if c == b' ' || c == b'\t' {
511 } else {
513 text_byte = idx;
514 state = It::Text;
515 }
516 } else if state == It::Text {
517 state = It::End;
518 break;
519 }
520 }
521
522 if state == It::Fail {
523 return None;
524 }
525
526 Some(MDBlockQuote {
527 quote: &txt[quote_byte..quote_byte + 1],
528 text_prefix: &txt[text_prefix_byte..text_byte],
529 text_bytes: relocate + text_byte..relocate + txt.len(),
530 text: &txt[text_byte..txt.len()],
531 })
532}