1use crate::error::Result;
10use crate::model::{
11 Align, Block, BlockImage, Cell, ColSpec, Column, Columns, Document, ImageSource, List, ListItem,
12 ListKind, Table, TableStyle,
13};
14
15mod attrs;
16mod inline;
17
18pub(crate) use attrs::{parse_attrs, Attr};
19
20pub fn parse(src: &str) -> Result<Document> {
22 let lines: Vec<String> = src.lines().map(|l| l.to_string()).collect();
23 Ok(Document { blocks: parse_blocks(&lines) })
24}
25
26fn indent_of(s: &str) -> usize {
28 s.len() - s.trim_start().len()
29}
30
31fn dedent(s: &str, n: usize) -> String {
33 let strip = s.bytes().take_while(|b| *b == b' ').count().min(n);
34 s[strip..].to_string()
35}
36
37fn parse_blocks(lines: &[String]) -> Vec<Block> {
39 let mut blocks = Vec::new();
40 let mut i = 0;
41 while i < lines.len() {
42 let line = &lines[i];
43 if line.trim().is_empty() {
44 i += 1;
45 continue;
46 }
47 let ind = indent_of(line);
48 let content = line[ind..].to_string();
49
50 if let Some(lang) = content.strip_prefix("```") {
52 let lang = lang.trim().to_string();
53 let mut text = Vec::new();
54 i += 1;
55 while i < lines.len() && !lines[i].trim_start().starts_with("```") {
56 text.push(lines[i].clone());
57 i += 1;
58 }
59 i += 1; blocks.push(Block::Code {
61 lang: if lang.is_empty() { None } else { Some(lang) },
62 text: text.join("\n"),
63 });
64 continue;
65 }
66
67 if is_fence_open(&content) {
69 let word = content[3..].trim().to_string();
70 let inner = gather_div(lines, &mut i); if word == "columns" {
72 blocks.push(Block::Columns(Columns { cols: parse_columns(&inner), gap: None }));
73 } else if let Some(align) = align_from_word(&word) {
74 let mut sub = parse_blocks(&inner);
75 apply_align(&mut sub, align);
76 blocks.append(&mut sub);
77 } else {
78 blocks.append(&mut parse_blocks(&inner)); }
80 continue;
81 }
82
83 if let Some((level, rest)) = heading(&content) {
85 let (text, align) = split_trailing_attrs(rest);
86 blocks.push(Block::Heading { level, inlines: inline::parse_inlines(&text), align });
87 i += 1;
88 continue;
89 }
90
91 if is_hr(&content) {
93 blocks.push(Block::Divider);
94 i += 1;
95 continue;
96 }
97
98 if content.starts_with('>') {
100 let mut inner = Vec::new();
101 while i < lines.len() {
102 let t = lines[i].trim_start();
103 let Some(r) = t.strip_prefix('>') else { break };
104 inner.push(r.strip_prefix(' ').unwrap_or(r).to_string());
105 i += 1;
106 }
107 blocks.push(Block::Quote(parse_blocks(&inner)));
108 continue;
109 }
110
111 if let Some(img) = block_image(&content) {
113 blocks.push(Block::Image(img));
114 i += 1;
115 continue;
116 }
117
118 if list_marker(&content).is_some() {
120 let (list, next) = parse_list(lines, i, ind);
121 blocks.push(Block::List(list));
122 i = next;
123 continue;
124 }
125
126 if content.contains('|')
128 && i + 1 < lines.len()
129 && is_table_delim(lines[i + 1].trim())
130 {
131 let (table, next) = parse_table(lines, i);
132 blocks.push(Block::Table(table));
133 i = next;
134 continue;
135 }
136
137 let mut para = String::new();
139 while i < lines.len() {
140 let l = &lines[i];
141 if l.trim().is_empty() {
142 break;
143 }
144 let c = l[indent_of(l)..].to_string();
145 if is_block_start(&c) {
146 break;
147 }
148 let mut piece = c.trim();
149 let hard = piece.ends_with('\\');
150 if hard {
151 piece = piece[..piece.len() - 1].trim_end();
152 }
153 append_soft(&mut para, piece);
154 if hard {
155 para.push('\n');
156 }
157 i += 1;
158 }
159 let (text, align) = split_trailing_attrs(¶);
160 blocks.push(Block::Paragraph { inlines: inline::parse_inlines(&text), align });
161 }
162 blocks
163}
164
165fn is_block_start(c: &str) -> bool {
167 c.starts_with("```")
168 || is_fence_open(c)
169 || is_hr(c)
170 || c.starts_with('>')
171 || heading(c).is_some()
172 || list_marker(c).is_some()
173 || block_image(c).is_some()
174}
175
176fn is_hr(c: &str) -> bool {
178 let b = c.as_bytes();
179 b.len() >= 3 && matches!(b[0], b'-' | b'*' | b'_') && b.iter().all(|x| *x == b[0])
180}
181
182fn parse_list(lines: &[String], start: usize, base: usize) -> (List, usize) {
185 let (ordered, first_start, _) = list_marker(&lines[start][base..]).unwrap();
186 let kind = if ordered { ListKind::Ordered } else { ListKind::Unordered };
187 let mut items = Vec::new();
188 let mut i = start;
189 while i < lines.len() {
190 let line = &lines[i];
191 if line.trim().is_empty() {
192 if next_nonblank_indent(lines, i + 1).map(|n| n >= base).unwrap_or(false) {
194 i += 1;
195 continue;
196 }
197 break;
198 }
199 let ind = indent_of(line);
200 if ind < base {
201 break;
202 }
203 let Some((ord, _, off)) = list_marker(&line[ind..]) else {
204 break; };
206 if ind != base || ord != ordered {
207 break; }
209 let content_indent = base + off;
211 let (first_line, check) = split_task_mark(&line[ind..][off..]);
212 let mut item_lines = vec![first_line];
213 i += 1;
214 while i < lines.len() {
215 let l = &lines[i];
216 if l.trim().is_empty() {
217 if next_nonblank_indent(lines, i + 1).map(|n| n > base).unwrap_or(false) {
218 item_lines.push(String::new());
219 i += 1;
220 continue;
221 }
222 break;
223 }
224 if indent_of(l) > base {
225 item_lines.push(dedent(l, content_indent));
226 i += 1;
227 } else {
228 break;
229 }
230 }
231 items.push(ListItem { blocks: parse_blocks(&item_lines), check });
232 }
233 (List { kind, start: first_start.max(1), items }, i)
234}
235
236fn split_task_mark(s: &str) -> (String, Option<bool>) {
239 let done = match s.get(..3) {
240 Some("[ ]") => false,
241 Some("[x]") | Some("[X]") => true,
242 _ => return (s.to_string(), None),
243 };
244 match s[3..].chars().next() {
245 None => (String::new(), Some(done)),
246 Some(c) if c.is_whitespace() => (s[3 + c.len_utf8()..].to_string(), Some(done)),
247 _ => (s.to_string(), None),
248 }
249}
250
251fn next_nonblank_indent(lines: &[String], from: usize) -> Option<usize> {
253 lines[from..].iter().find(|l| !l.trim().is_empty()).map(|l| indent_of(l))
254}
255
256fn heading(c: &str) -> Option<(u8, &str)> {
258 let hashes = c.bytes().take_while(|b| *b == b'#').count();
259 if (1..=6).contains(&hashes) && c.as_bytes().get(hashes) == Some(&b' ') {
260 Some((hashes as u8, c[hashes + 1..].trim()))
261 } else {
262 None
263 }
264}
265
266fn list_marker(c: &str) -> Option<(bool, u32, usize)> {
268 let b = c.as_bytes();
269 if matches!(b.first(), Some(b'-' | b'*' | b'+')) && matches!(b.get(1), Some(b' ' | b'\t')) {
271 return Some((false, 0, 2));
272 }
273 let digits = c.bytes().take_while(|x| x.is_ascii_digit()).count();
275 if digits > 0
276 && matches!(b.get(digits), Some(b'.' | b')'))
277 && matches!(b.get(digits + 1), Some(b' ' | b'\t'))
278 {
279 let n = c[..digits].parse::<u32>().unwrap_or(1);
280 return Some((true, n, digits + 2));
281 }
282 None
283}
284
285fn block_image(c: &str) -> Option<BlockImage> {
287 let c = c.trim();
288 let rest = c.strip_prefix("?;
290 if !c.ends_with(')') {
291 return None;
292 }
293 let alt = &rest[..close_alt];
294 let src = &rest[close_alt + 2..rest.len() - 1];
295 if src.is_empty() {
296 return None;
297 }
298 Some(BlockImage {
299 src: image_source(src),
300 width: None,
301 align: Align::Left,
302 caption: if alt.trim().is_empty() { None } else { Some(inline::parse_inlines(alt.trim())) },
303 })
304}
305
306pub(crate) fn image_source(src: &str) -> ImageSource {
308 match src.strip_prefix('@') {
309 Some(name) => ImageSource::Named(name.to_string()),
310 None => ImageSource::Path(src.into()),
311 }
312}
313
314fn align_from_word(w: &str) -> Option<Align> {
316 match w {
317 "center" | "centre" => Some(Align::Center),
318 "right" => Some(Align::Right),
319 "left" => Some(Align::Left),
320 "justify" => Some(Align::Justify),
321 _ => None,
322 }
323}
324
325fn is_fence_open(c: &str) -> bool {
327 c.starts_with(":::") && c.len() > 3 && !c[3..].trim().is_empty()
328}
329
330fn gather_div(lines: &[String], i: &mut usize) -> Vec<String> {
332 *i += 1;
333 let mut inner = Vec::new();
334 let mut depth = 1usize;
335 while *i < lines.len() {
336 let t = lines[*i].trim();
337 if t == ":::" {
338 depth -= 1;
339 if depth == 0 {
340 *i += 1;
341 break; }
343 } else if is_fence_open(t) {
344 depth += 1;
345 }
346 inner.push(lines[*i].clone());
347 *i += 1;
348 }
349 inner
350}
351
352fn parse_columns(inner: &[String]) -> Vec<Column> {
354 let mut cols = Vec::new();
355 let mut i = 0;
356 while i < inner.len() {
357 let mut parts = inner[i].trim().strip_prefix(":::").unwrap_or("").split_whitespace();
358 if parts.next() == Some("col") {
359 let weight =
360 parts.next().and_then(|s| s.parse::<f32>().ok()).filter(|w| *w > 0.0).unwrap_or(1.0);
361 let col_lines = gather_div(inner, &mut i);
362 cols.push(Column { blocks: parse_blocks(&col_lines), weight });
363 } else {
364 i += 1;
365 }
366 }
367 cols
368}
369
370fn is_table_delim(t: &str) -> bool {
372 let cells = split_row(t);
373 !cells.is_empty()
374 && cells
375 .iter()
376 .all(|c| !c.is_empty() && c.contains('-') && c.bytes().all(|b| b == b'-' || b == b':'))
377}
378
379fn split_row(line: &str) -> Vec<String> {
382 let t = line.trim();
383 let t = t.strip_prefix('|').unwrap_or(t);
384 let t = t.strip_suffix('|').unwrap_or(t);
385 let mut cells = Vec::new();
386 let mut cur = String::new();
387 let mut in_code = false;
388 let mut chars = t.chars();
389 while let Some(ch) = chars.next() {
390 match ch {
391 '`' => {
392 in_code = !in_code;
393 cur.push('`');
394 }
395 '\\' if !in_code => {
397 cur.push('\\');
398 if let Some(n) = chars.next() {
399 cur.push(n);
400 }
401 }
402 '|' if !in_code => {
403 cells.push(cur.trim().to_string());
404 cur = String::new();
405 }
406 _ => cur.push(ch),
407 }
408 }
409 cells.push(cur.trim().to_string());
410 cells
411}
412
413fn parse_align_row(line: &str) -> Vec<Align> {
415 split_row(line)
416 .iter()
417 .map(|c| match (c.starts_with(':'), c.ends_with(':')) {
418 (true, true) => Align::Center,
419 (false, true) => Align::Right,
420 _ => Align::Left,
421 })
422 .collect()
423}
424
425fn parse_table(lines: &[String], start: usize) -> (Table, usize) {
427 let to_cells = |t: &str| -> Vec<Cell> {
428 split_row(t).iter().map(|s| Cell { inlines: inline::parse_inlines(s), bg: None }).collect()
429 };
430 let header = Some(to_cells(lines[start].trim()));
431 let cols: Vec<ColSpec> = parse_align_row(lines[start + 1].trim())
432 .into_iter()
433 .map(|a| ColSpec { align: a, width: None })
434 .collect();
435 let mut rows = Vec::new();
436 let mut i = start + 2;
437 while i < lines.len() {
438 let t = lines[i].trim();
439 if t.is_empty() || !t.contains('|') {
440 break;
441 }
442 rows.push(to_cells(t));
443 i += 1;
444 }
445 (Table { header, rows, cols, style: TableStyle::default() }, i)
446}
447
448fn apply_align(blocks: &mut [Block], align: Align) {
450 for b in blocks {
451 match b {
452 Block::Heading { align: a, .. } | Block::Paragraph { align: a, .. } => *a = align,
453 Block::Quote(inner) => apply_align(inner, align),
454 Block::List(list) => {
455 for it in &mut list.items {
456 apply_align(&mut it.blocks, align);
457 }
458 }
459 _ => {}
460 }
461 }
462}
463
464fn split_trailing_attrs(s: &str) -> (String, Align) {
466 let t = s.trim_end();
467 if t.ends_with('}') {
468 if let Some(open) = t.rfind('{') {
469 let before = &t[..open];
470 if before.ends_with(' ') || before.is_empty() {
471 let inside = &t[open + 1..t.len() - 1];
472 let align = parse_attrs(inside)
473 .iter()
474 .find_map(|a| match a {
475 Attr::Kv(k, v) if k == "align" => align_from_word(v),
476 Attr::Flag(f) => align_from_word(f),
477 _ => None,
478 })
479 .unwrap_or(Align::Left);
480 return (before.trim_end().to_string(), align);
481 }
482 }
483 }
484 (t.to_string(), Align::Left)
485}
486
487fn append_soft(buf: &mut String, next: &str) {
489 if next.is_empty() {
490 return;
491 }
492 if let (Some(a), Some(b)) = (buf.chars().last(), next.chars().next()) {
493 if a != '\n' && needs_space(a, b) {
495 buf.push(' ');
496 }
497 }
498 buf.push_str(next);
499}
500
501fn needs_space(a: char, b: char) -> bool {
502 fn cjk(c: char) -> bool {
504 matches!(c, '\u{2E80}'..='\u{9FFF}' | '\u{FF00}'..='\u{FFEF}')
505 }
506 !cjk(a) && !cjk(b)
507}
508
509
510