1use crate::config::{Config, Flavor};
2use crate::syntax::{AstNode, SyntaxKind, SyntaxNode};
3use panache_parser::parser::blocks::code_blocks::{CodeBlockType, InfoString};
4use panache_parser::parser::utils::hashpipe_normalizer::normalize_hashpipe_header;
5use rowan::NodeOrToken;
6use std::collections::HashMap;
7
8use super::hashpipe;
9
10pub type FormattedCodeMap = HashMap<(String, String), String>;
11
12#[derive(Debug, Clone)]
13pub struct ExternalCodeBlock {
14 pub language: String,
15 pub original: String,
16 pub formatter_input: String,
17 pub hashpipe_prefix: Option<String>,
18}
19
20pub(super) fn format_code_block(
22 node: &SyntaxNode,
23 config: &Config,
24 formatted_code: &FormattedCodeMap,
25 output: &mut String,
26) {
27 if is_unclosed_fenced_code_block(node) {
28 output.push_str(&node.text().to_string());
29 return;
30 }
31
32 let (info_node, language, extracted_content) = extract_code_block_parts(node);
33 let mut content = extracted_content;
34 let language_key = language.unwrap_or_default();
35
36 if let Some(formatted) = formatted_code.get(&(language_key.clone(), content.clone())) {
37 content = expand_tabs_with_width(formatted, config.tab_width);
38 } else if let Some(raw_content) = extract_raw_code_block_content(node)
39 && let Some(formatted) = formatted_code.get(&(language_key, raw_content))
40 {
41 content = expand_tabs_with_width(formatted, config.tab_width);
42 }
43
44 let info_node = match info_node {
45 Some(node) => node,
46 None => {
47 let mut final_content = content;
49 if !matches!(config.tab_stops, crate::config::TabStopMode::Preserve) {
50 final_content = expand_tabs_with_width(&final_content, config.tab_width);
51 }
52 let fence_char = '`';
53 let fence_length = determine_fence_length(&final_content, fence_char);
54 output.push_str(&fence_char.to_string().repeat(fence_length));
55 output.push('\n');
56 output.push_str(&final_content);
57 output.push_str(&fence_char.to_string().repeat(fence_length));
58 output.push('\n');
59 return;
60 }
61 };
62
63 let info_string_raw = info_node.text().to_string();
65 let info = InfoString::parse(&info_string_raw);
66
67 let mut final_content = content;
69 if !matches!(config.tab_stops, crate::config::TabStopMode::Preserve) {
70 final_content = expand_tabs_with_width(&final_content, config.tab_width);
71 }
72
73 let fence_char = '`';
75
76 let fence_length = determine_fence_length(&final_content, fence_char);
78
79 let use_hashpipe = matches!(config.flavor, Flavor::Quarto | Flavor::RMarkdown)
81 && matches!(&info.block_type, CodeBlockType::Executable { .. });
82
83 if use_hashpipe {
84 if format_code_block_hashpipe(
87 node,
88 &info_node,
89 &final_content,
90 fence_char,
91 fence_length,
92 config,
93 output,
94 ) {
95 return; }
97 }
99
100 let formatted_info = format_info_string(&info_node, &info);
102
103 log::trace!("formatted_info = '{}'", formatted_info);
104
105 for _ in 0..fence_length {
107 output.push(fence_char);
108 }
109 if !formatted_info.is_empty() {
110 output.push_str(&formatted_info);
111 }
112 output.push('\n');
113 output.push_str(&final_content);
114 for _ in 0..fence_length {
115 output.push(fence_char);
116 }
117 output.push('\n');
118}
119
120fn is_unclosed_fenced_code_block(node: &SyntaxNode) -> bool {
121 let has_open = node
122 .children()
123 .any(|child| child.kind() == SyntaxKind::CODE_FENCE_OPEN);
124 let has_close = node
125 .children()
126 .any(|child| child.kind() == SyntaxKind::CODE_FENCE_CLOSE);
127
128 has_open && !has_close
129}
130
131fn extract_raw_code_block_content(node: &SyntaxNode) -> Option<String> {
132 node.children()
133 .find(|child| child.kind() == SyntaxKind::CODE_CONTENT)
134 .map(|child| child.text().to_string())
135}
136
137fn expand_tabs_with_width(text: &str, tab_width: usize) -> String {
138 let mut out = String::with_capacity(text.len());
139 let mut col = 0usize;
140 for ch in text.chars() {
141 match ch {
142 '\t' => {
143 let spaces = tab_width - (col % tab_width);
144 out.push_str(&" ".repeat(spaces));
145 col += spaces;
146 }
147 '\n' => {
148 out.push('\n');
149 col = 0;
150 }
151 _ => {
152 out.push(ch);
153 col += 1;
154 }
155 }
156 }
157 out
158}
159
160fn strip_indent_columns(indent: &str, columns: usize) -> String {
161 let mut remaining = columns;
162 let mut idx = 0;
163 for (i, ch) in indent.char_indices() {
164 if remaining == 0 {
165 break;
166 }
167 match ch {
168 ' ' => {
169 remaining = remaining.saturating_sub(1);
170 idx = i + 1;
171 }
172 '\t' => {
173 remaining = remaining.saturating_sub(4);
174 idx = i + 1;
175 }
176 _ => break,
177 }
178 }
179 indent[idx..].to_string()
180}
181
182fn indent_columns(indent: &str) -> usize {
183 let mut cols = 0usize;
184 for ch in indent.chars() {
185 match ch {
186 ' ' => cols += 1,
187 '\t' => cols += 4 - (cols % 4),
188 _ => break,
189 }
190 }
191 cols
192}
193
194fn extract_code_block_parts(node: &SyntaxNode) -> (Option<SyntaxNode>, Option<String>, String) {
195 let mut info_node: Option<SyntaxNode> = None;
196 let mut language: Option<String> = None;
197 let mut content = String::new();
198 let mut has_fence = false;
199 let mut fence_indent = String::new();
200 let mut fence_indent_cols = 0usize;
201
202 for child in node.children_with_tokens() {
203 match child {
204 NodeOrToken::Token(t) => {
205 if t.kind() == SyntaxKind::WHITESPACE && !has_fence {
206 fence_indent = t.text().to_string();
207 }
208 }
209 NodeOrToken::Node(n) => match n.kind() {
210 SyntaxKind::CODE_FENCE_OPEN => {
211 has_fence = true;
212 fence_indent_cols = indent_columns(&fence_indent);
213 for child_token in n.children_with_tokens() {
214 if let NodeOrToken::Node(node) = child_token
215 && node.kind() == SyntaxKind::CODE_INFO
216 {
217 for info_token in node.children_with_tokens() {
218 if let NodeOrToken::Token(t) = info_token
219 && t.kind() == SyntaxKind::CODE_LANGUAGE
220 {
221 language = Some(t.text().to_string());
222 }
223 }
224 info_node = Some(node);
225 }
226 }
227 }
228 SyntaxKind::CODE_CONTENT => {
229 let base_indent_cols = if has_fence { fence_indent_cols } else { 4 };
230 let mut line_content = String::new();
231 let mut line_indent = String::new();
232 let mut at_line_start = true;
233 let mut saw_blockquote_marker = false;
234
235 for token in n.children_with_tokens() {
236 match token {
237 NodeOrToken::Token(t) => match t.kind() {
238 SyntaxKind::BLOCK_QUOTE_MARKER if at_line_start => {
239 saw_blockquote_marker = true;
243 }
244 SyntaxKind::WHITESPACE if at_line_start => {
245 if saw_blockquote_marker {
246 let ws = t.text();
247 if let Some(stripped) = ws.strip_prefix(' ') {
248 line_indent.push_str(stripped);
249 } else {
250 line_indent.push_str(ws);
251 }
252 saw_blockquote_marker = false;
253 } else {
254 line_indent.push_str(t.text());
255 }
256 }
257 SyntaxKind::TEXT => {
258 saw_blockquote_marker = false;
259 if at_line_start && t.text().is_empty() {
260 continue;
261 }
262 if at_line_start {
263 line_content.push_str(&strip_indent_columns(
264 &line_indent,
265 base_indent_cols,
266 ));
267 line_indent.clear();
268 at_line_start = false;
269 }
270 line_content.push_str(t.text());
271 }
272 SyntaxKind::NEWLINE => {
273 saw_blockquote_marker = false;
274 if !at_line_start {
275 content.push_str(&line_content);
276 }
277 content.push('\n');
278 line_content.clear();
279 line_indent.clear();
280 at_line_start = true;
281 }
282 _ => {}
283 },
284 NodeOrToken::Node(inner_node) => {
285 let node_text = inner_node.text().to_string();
286 if node_text.is_empty() {
287 continue;
288 }
289 if at_line_start {
290 line_content.push_str(&strip_indent_columns(
291 &line_indent,
292 base_indent_cols,
293 ));
294 line_indent.clear();
295 at_line_start = false;
296 }
297 line_content.push_str(&node_text);
298 }
299 }
300 }
301
302 if !at_line_start {
303 content.push_str(&line_content);
304 }
305 }
306 _ => {}
307 },
308 }
309 }
310
311 (info_node, language, content)
312}
313
314fn split_hashpipe_header(content: &str, prefix: &str) -> Option<(String, String)> {
315 let normalized = normalize_hashpipe_header(content, prefix)?;
316 let header_end = normalized.header_byte_span.end;
317 Some((
318 content[..header_end].to_string(),
319 content[header_end..].to_string(),
320 ))
321}
322
323fn determine_fence_length(content: &str, fence_char: char) -> usize {
325 let mut max_sequence = 0;
326 let mut current_sequence = 0;
327
328 for ch in content.chars() {
329 if ch == fence_char {
330 current_sequence += 1;
331 max_sequence = max_sequence.max(current_sequence);
332 } else if ch == '\n' || ch == '\r' {
333 current_sequence = 0;
335 } else if current_sequence > 0 {
336 current_sequence = 0;
338 }
339 }
340
341 (max_sequence + 1).max(3)
343}
344
345fn extract_chunk_options_from_cst(
348 info_node: &SyntaxNode,
349) -> Vec<(Option<String>, Option<String>, bool)> {
350 use crate::syntax::{ChunkInfoItem, CodeInfo};
351
352 let Some(info) = CodeInfo::cast(info_node.clone()) else {
353 return Vec::new();
354 };
355
356 let mut options = Vec::new();
357 let mut pending_label_parts = Vec::new();
358 for item in info.chunk_items() {
359 match item {
360 ChunkInfoItem::Label(label) => {
361 let value = label.text();
362 if !value.is_empty() {
363 pending_label_parts.push(value);
364 }
365 }
366 ChunkInfoItem::Option(option) => {
367 if !pending_label_parts.is_empty() {
368 options.push((None, Some(pending_label_parts.join(" ")), false));
369 pending_label_parts.clear();
370 }
371 if let (Some(key), Some(value)) = (option.key(), option.value()) {
372 options.push((Some(key), Some(value), option.is_quoted()));
373 }
374 }
375 }
376 }
377
378 if !pending_label_parts.is_empty() {
379 options.push((None, Some(pending_label_parts.join(" ")), false));
380 }
381
382 options
383}
384
385fn format_chunk_options_inline(options: &[(Option<String>, Option<String>, bool)]) -> String {
387 let mut parts = Vec::new();
388
389 for (key, value, is_quoted) in options {
390 match (key, value) {
391 (None, Some(val)) => {
392 parts.push(val.clone());
394 }
395 (Some(k), Some(v)) => {
396 if *is_quoted {
398 let quote = if v.contains('"') && !v.contains('\'') {
402 '\''
403 } else {
404 '"'
405 };
406 parts.push(format!("{}={}{}{}", k, quote, v, quote));
407 } else {
408 parts.push(format!("{}={}", k, v));
409 }
410 }
411 _ => {}
412 }
413 }
414
415 parts.join(", ")
416}
417
418fn format_info_string(info_node: &SyntaxNode, info: &InfoString) -> String {
420 log::trace!(
421 "format_info_string: block_type={:?}, raw='{}'",
422 info.block_type,
423 info.raw
424 );
425 match &info.block_type {
426 CodeBlockType::Plain => {
427 if info.attributes.is_empty() {
429 String::new()
430 } else {
431 format!("{{{}}}", format_attributes(&info.attributes, false))
432 }
433 }
434 CodeBlockType::DisplayShortcut { language } => {
435 if info.attributes.is_empty() {
437 language.clone()
438 } else {
439 format!(
440 "{} {{{}}}",
441 language,
442 format_attributes(&info.attributes, false)
443 )
444 }
445 }
446 CodeBlockType::DisplayExplicit { classes } => {
447 if let Some(first_class) = classes.first() {
450 if info.attributes.is_empty() && classes.len() == 1 {
451 first_class.clone()
452 } else {
453 let mut attrs: Vec<String> =
455 classes.iter().skip(1).map(|c| format!(".{}", c)).collect();
456 attrs.extend(info.attributes.iter().map(|(k, v)| {
457 if let Some(val) = v {
458 format!("{}=\"{}\"", k, val)
459 } else {
460 k.clone()
461 }
462 }));
463 if attrs.is_empty() {
464 first_class.clone()
465 } else {
466 format!("{} {{{}}}", first_class, attrs.join(" "))
467 }
468 }
469 } else {
470 if info.attributes.is_empty() {
472 String::new()
473 } else {
474 format!("{{{}}}", format_attributes(&info.attributes, false))
475 }
476 }
477 }
478 CodeBlockType::Executable { language } => {
479 let options = extract_chunk_options_from_cst(info_node);
482 if options.is_empty() {
483 format!("{{{}}}", language)
484 } else {
485 format!(
486 "{{{}, {}}}",
487 language,
488 format_chunk_options_inline(&options)
489 )
490 }
491 }
492 CodeBlockType::Raw { format } => {
493 format!("{{={}}}", format)
496 }
497 }
498}
499
500fn format_code_block_hashpipe(
506 _code_block_node: &SyntaxNode,
507 info_node: &SyntaxNode,
508 content: &str,
509 fence_char: char,
510 fence_length: usize,
511 config: &Config,
512 output: &mut String,
513) -> bool {
514 let info = InfoString::parse(&info_node.text().to_string());
515 let language = match &info.block_type {
516 CodeBlockType::Executable { language } => language,
517 _ => unreachable!("hashpipe only for executable chunks"),
518 };
519
520 let Some(comment_prefix) = hashpipe::get_comment_prefix(language) else {
523 return false; };
525 let ((simple, complex), had_content_hashpipe) =
526 hashpipe::split_options_from_cst_with_content(info_node, content, comment_prefix);
527
528 let hashpipe_lines = match hashpipe::format_as_hashpipe(
530 language,
531 &simple,
532 config.line_width,
533 config.wrap.as_ref(),
534 ) {
535 Some(lines) => lines,
536 None => return false, };
538
539 for _ in 0..fence_length {
541 output.push(fence_char);
542 }
543 output.push('{');
544 output.push_str(language);
545 if !complex.is_empty() {
546 output.push_str(", ");
547 output.push_str(&format_chunk_options_inline(&complex));
548 }
549 output.push('}');
550 output.push('\n');
551
552 for line in &hashpipe_lines {
554 output.push_str(line);
555 output.push('\n');
556 }
557
558 let body = if had_content_hashpipe {
560 if let Some(prefix) = hashpipe::get_comment_prefix(language) {
561 if let Some((_header, body)) = split_hashpipe_header(content, prefix) {
562 body
563 } else {
564 content.to_string()
565 }
566 } else {
567 content.to_string()
568 }
569 } else {
570 content.to_string()
571 };
572
573 if !hashpipe_lines.is_empty() {
574 let body_without_leading_blanks = strip_leading_blank_lines(&body);
575 let (body_without_marker_separators, had_marker_separator) =
576 strip_leading_hashpipe_blank_markers(body_without_leading_blanks, comment_prefix);
577 if !body_without_marker_separators.trim().is_empty()
578 && (had_marker_separator || !body_without_marker_separators.starts_with(comment_prefix))
579 {
580 output.push('\n');
581 }
582 output.push_str(body_without_marker_separators);
583 } else {
584 output.push_str(&body);
585 }
586
587 for _ in 0..fence_length {
589 output.push(fence_char);
590 }
591 output.push('\n');
592
593 true }
595
596fn strip_leading_blank_lines(content: &str) -> &str {
597 let mut idx = 0usize;
598
599 while idx < content.len() {
600 let rest = &content[idx..];
601 let Some(line_end) = rest.find('\n') else {
602 if rest.trim().is_empty() {
603 return "";
604 }
605 break;
606 };
607
608 let line = &rest[..=line_end];
609 let line_without_newline = line.trim_end_matches(['\r', '\n']);
610 if line_without_newline.trim().is_empty() {
611 idx += line_end + 1;
612 continue;
613 }
614
615 break;
616 }
617
618 &content[idx..]
619}
620
621fn strip_leading_hashpipe_blank_markers<'a>(content: &'a str, prefix: &str) -> (&'a str, bool) {
622 let mut idx = 0usize;
623 let mut consumed = false;
624
625 while idx < content.len() {
626 let rest = &content[idx..];
627 let Some(line_end) = rest.find('\n') else {
628 let trimmed = rest.trim_start_matches([' ', '\t']).trim_end_matches('\r');
629 if trimmed == prefix {
630 consumed = true;
631 idx = content.len();
632 }
633 break;
634 };
635
636 let line = &rest[..line_end];
637 let trimmed = line.trim_start_matches([' ', '\t']).trim_end_matches('\r');
638 if trimmed == prefix {
639 consumed = true;
640 idx += line_end + 1;
641 continue;
642 }
643 break;
644 }
645
646 (&content[idx..], consumed)
647}
648
649fn format_attributes(attrs: &[(String, Option<String>)], preserve_unquoted: bool) -> String {
654 let separator = if preserve_unquoted {
655 ", " } else {
657 " " };
659
660 attrs
661 .iter()
662 .map(|(k, v)| {
663 if let Some(val) = v {
664 if preserve_unquoted {
665 let needs_quotes = (val.contains(' ') || val.contains(','))
669 && !val.contains('(')
670 && !val.contains('[')
671 && !val.contains('{');
672
673 if needs_quotes {
674 let escaped_val = val.replace('\\', "\\\\").replace('"', "\\\"");
676 format!("{}=\"{}\"", k, escaped_val)
677 } else {
678 format!("{}={}", k, val)
680 }
681 } else {
682 let escaped_val = val.replace('\\', "\\\\").replace('"', "\\\"");
685 format!("{}=\"{}\"", k, escaped_val)
686 }
687 } else {
688 k.clone()
689 }
690 })
691 .collect::<Vec<_>>()
692 .join(separator)
693}
694
695pub fn collect_code_blocks(
699 tree: &SyntaxNode,
700 _input: &str,
701 config: &Config,
702) -> Vec<ExternalCodeBlock> {
703 let mut result = Vec::new();
704 for node in tree.descendants() {
705 if node.kind() != SyntaxKind::CODE_BLOCK {
706 continue;
707 }
708
709 let (info_node, language, content) = extract_code_block_parts(&node);
710 if content.is_empty() {
711 continue;
712 }
713
714 let info = info_node
715 .as_ref()
716 .map(|n| InfoString::parse(&n.text().to_string()))
717 .unwrap_or_else(|| InfoString::parse(""));
718
719 let language = language.unwrap_or_else(|| match info.block_type {
720 CodeBlockType::DisplayShortcut { language }
721 | CodeBlockType::Executable { language } => language,
722 CodeBlockType::DisplayExplicit { classes } => {
723 classes.first().cloned().unwrap_or_default()
724 }
725 _ => String::new(),
726 });
727
728 if language.is_empty() && !config.formatters.contains_key("") {
729 continue;
730 }
731
732 result.push(ExternalCodeBlock {
733 language,
734 original: content.clone(),
735 formatter_input: content,
736 hashpipe_prefix: None,
737 });
738 }
739
740 if !matches!(config.flavor, Flavor::Quarto | Flavor::RMarkdown) {
741 return result;
742 }
743
744 let mut updated = Vec::with_capacity(result.len());
745 for block in result {
746 let mut formatter_input = block.formatter_input.clone();
747 let mut prefix = None;
748
749 for node in tree.descendants() {
750 if node.kind() != SyntaxKind::CODE_BLOCK {
751 continue;
752 }
753
754 let (info_node, language, content) = extract_code_block_parts(&node);
755 if content != block.original {
756 continue;
757 }
758
759 let info_node = match info_node {
760 Some(node) => node,
761 None => break,
762 };
763
764 let info_raw = info_node.text().to_string();
765 let info = InfoString::parse(&info_raw);
766 let is_executable = matches!(info.block_type, CodeBlockType::Executable { .. });
767 if !is_executable {
768 break;
769 }
770
771 let language = language.unwrap_or_else(|| match info.block_type {
772 CodeBlockType::Executable { language } => language,
773 _ => String::new(),
774 });
775
776 if let Some(prefix_str) = hashpipe::get_comment_prefix(&language)
777 && let Some((header, body)) = split_hashpipe_header(&content, prefix_str)
778 {
779 formatter_input = body;
780 prefix = Some(header);
781 }
782 break;
783 }
784
785 updated.push(ExternalCodeBlock {
786 language: block.language,
787 original: block.original,
788 formatter_input,
789 hashpipe_prefix: prefix,
790 });
791 }
792
793 updated
794}
795
796#[cfg(test)]
797mod tests {
798 use super::split_hashpipe_header;
799
800 #[test]
801 fn split_hashpipe_header_handles_empty_value_with_indented_list() {
802 let content = "#| fig-cap:\n#| - A\n#| - B\n";
803 let split = split_hashpipe_header(content, "#|");
804 assert!(split.is_some(), "expected hashpipe header split");
805 let (header, body) = split.unwrap();
806 assert_eq!(header, content);
807 assert_eq!(body, "");
808 }
809}