use anyhow::{bail, Result};
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::path::PathBuf;
use crate::executor::{execute_code, Language};
use crate::markers::{
get_indent, is_code_backticks_end, is_code_backticks_start, is_code_comment_bash_start,
is_code_comment_end, is_code_comment_python_start, is_output_end, is_output_start, is_skip,
remove_md_comment, WARNING,
};
static OPTION_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?P<key>\w+)=(?P<value>\S+)").unwrap());
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Section {
Normal,
Output,
CodeCommentPython,
CodeCommentBash,
CodeBackticks,
}
#[derive(Debug, Clone, Default)]
pub struct BacktickOptions {
pub language: String,
pub filename: Option<PathBuf>,
pub other: HashMap<String, String>,
}
impl BacktickOptions {
pub fn from_line(line: &str) -> Self {
let mut options = BacktickOptions::default();
static LANG_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"```(?P<language>\w+)").unwrap());
if let Some(caps) = LANG_PATTERN.captures(line) {
options.language = caps["language"].to_string();
}
if line.contains("markdown-code-runner") {
for caps in OPTION_PATTERN.captures_iter(line) {
let key = caps["key"].to_string();
let value = caps["value"].to_string();
if key == "filename" {
options.filename = Some(PathBuf::from(value));
} else {
options.other.insert(key, value);
}
}
}
options
}
}
pub struct ProcessingState {
pub section: Section,
pub code: Vec<String>,
pub original_output: Vec<String>,
pub skip_code_block: bool,
pub output: Option<Vec<String>>,
pub new_lines: Vec<String>,
pub backtick_options: BacktickOptions,
pub backtick_standardize: bool,
pub indent: String,
pub verbose: bool,
python_blocks: Vec<Vec<String>>,
}
impl ProcessingState {
pub fn new(backtick_standardize: bool, verbose: bool) -> Self {
Self {
section: Section::Normal,
code: Vec::new(),
original_output: Vec::new(),
skip_code_block: false,
output: None,
new_lines: Vec::new(),
backtick_options: BacktickOptions::default(),
backtick_standardize,
indent: String::new(),
verbose,
python_blocks: Vec::new(),
}
}
pub fn process_line(&mut self, line: &str) -> Result<()> {
if is_skip(line) {
self.skip_code_block = true;
self.new_lines.push(line.to_string());
} else if is_output_start(line).is_some() {
self.process_output_start(line);
} else if is_output_end(line) {
self.process_output_end(line);
} else {
match self.section {
Section::CodeCommentPython | Section::CodeCommentBash => {
self.process_comment_code(line)?;
}
Section::CodeBackticks => {
self.process_backtick_code(line)?;
}
Section::Output => {
self.original_output.push(line.to_string());
}
Section::Normal => {
let processed_line = self.process_start_markers(line);
self.new_lines
.push(processed_line.unwrap_or_else(|| line.to_string()));
return Ok(());
}
}
if self.section != Section::Output {
self.new_lines.push(line.to_string());
}
}
Ok(())
}
fn process_start_markers(&mut self, line: &str) -> Option<String> {
if is_code_comment_python_start(line).is_some() {
self.output = None;
self.section = Section::CodeCommentPython;
self.indent = get_indent(line);
return Some(line.to_string());
}
if is_code_comment_bash_start(line).is_some() {
self.output = None;
self.section = Section::CodeCommentBash;
self.indent = get_indent(line);
return Some(line.to_string());
}
if let Some(caps) = is_code_backticks_start(line) {
self.output = None;
self.backtick_options = BacktickOptions::from_line(line);
self.section = Section::CodeBackticks;
self.indent = caps.name("spaces").map_or("", |m| m.as_str()).to_string();
if self.backtick_standardize && line.contains("markdown-code-runner") {
static STRIP_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\s+markdown-code-runner.*").unwrap());
return Some(STRIP_PATTERN.replace(line, "").to_string());
}
return Some(line.to_string());
}
None
}
fn process_output_start(&mut self, line: &str) {
self.section = Section::Output;
if !self.skip_code_block {
let output = self.output.as_ref().unwrap_or_else(|| {
panic!("Output must be set before OUTPUT:START, line: {}", line)
});
let indent = get_indent(line);
self.new_lines.push(line.to_string());
self.new_lines.push(format!("{}{}", indent, WARNING));
for ol in output {
let trimmed = ol.trim_end();
if trimmed.is_empty() {
self.new_lines.push(String::new());
} else {
self.new_lines.push(format!("{}{}", indent, trimmed));
}
}
} else {
self.original_output.push(line.to_string());
}
}
fn process_output_end(&mut self, line: &str) {
self.section = Section::Normal;
if self.skip_code_block {
self.new_lines.append(&mut self.original_output);
self.skip_code_block = false;
}
self.new_lines.push(line.to_string());
self.original_output.clear();
self.output = None;
}
fn strip_indent(&self, line: &str) -> String {
if !self.indent.is_empty() && line.starts_with(&self.indent) {
line[self.indent.len()..].to_string()
} else {
line.to_string()
}
}
fn process_comment_code(&mut self, line: &str) -> Result<()> {
if is_code_comment_end(line) {
if !self.skip_code_block {
let language = match self.section {
Section::CodeCommentPython => Language::Python,
Section::CodeCommentBash => Language::Bash,
_ => unreachable!(),
};
self.execute_current_block(language)?;
}
self.section = Section::Normal;
self.code.clear();
self.backtick_options = BacktickOptions::default();
self.indent.clear();
} else {
if let Some(code_line) = remove_md_comment(line) {
self.code.push(code_line);
}
}
Ok(())
}
fn process_backtick_code(&mut self, line: &str) -> Result<()> {
if is_code_backticks_end(line) {
if !self.skip_code_block {
let language = Language::parse(&self.backtick_options.language);
let output_file = self.backtick_options.filename.clone();
if language.is_none() && output_file.is_none() {
bail!("Specify 'output_file' for non-Python/Bash languages.");
}
if let Some(lang) = language {
self.execute_current_block_with_file(lang, output_file.as_deref())?;
} else {
let code = self.code.clone();
let verbose = self.verbose;
self.output = Some(execute_code(
&code,
Language::Python,
output_file.as_deref(),
verbose,
)?);
}
}
self.section = Section::Normal;
self.code.clear();
self.backtick_options = BacktickOptions::default();
self.indent.clear();
} else {
let stripped = self.strip_indent(line);
self.code.push(stripped);
}
Ok(())
}
fn execute_current_block(&mut self, language: Language) -> Result<()> {
self.execute_current_block_with_file(language, None)
}
fn execute_current_block_with_file(
&mut self,
language: Language,
output_file: Option<&std::path::Path>,
) -> Result<()> {
if output_file.is_some() {
self.output = Some(execute_code(
&self.code,
language,
output_file,
self.verbose,
)?);
} else if language == Language::Python {
self.python_blocks.push(self.code.clone());
let marker = format!("__MCR_MARKER_{}__", self.python_blocks.len());
let mut code_with_marker: Vec<String> = Vec::new();
for (i, block) in self.python_blocks.iter().enumerate() {
if i == self.python_blocks.len() - 1 {
code_with_marker.push(format!("print('{}')", marker));
}
code_with_marker.extend(block.iter().cloned());
}
let output = execute_code(&code_with_marker, Language::Python, None, self.verbose)?;
let mut in_current_block = false;
let mut current_output: Vec<String> = Vec::new();
for line in output {
if line == marker {
in_current_block = true;
} else if in_current_block {
current_output.push(line);
}
}
self.output = Some(current_output);
} else {
self.output = Some(execute_code(&self.code, language, None, self.verbose)?);
}
Ok(())
}
}
pub fn process_markdown(
content: &[String],
verbose: bool,
backtick_standardize: bool,
execute: bool,
) -> Result<Vec<String>> {
if !execute {
return Ok(content.to_vec());
}
let mut state = ProcessingState::new(backtick_standardize, verbose);
for (i, line) in content.iter().enumerate() {
if verbose {
eprintln!("\x1b[1mline {:4}\x1b[0m: {}", i, line);
}
state.process_line(line)?;
}
Ok(state.new_lines)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_backtick_options_from_line() {
let opts = BacktickOptions::from_line("```python markdown-code-runner filename=test.py");
assert_eq!(opts.language, "python");
assert_eq!(opts.filename, Some(PathBuf::from("test.py")));
let opts = BacktickOptions::from_line("```bash markdown-code-runner");
assert_eq!(opts.language, "bash");
assert_eq!(opts.filename, None);
let opts = BacktickOptions::from_line("```python");
assert_eq!(opts.language, "python");
assert_eq!(opts.filename, None);
}
#[test]
fn test_process_simple_python() {
let input = vec![
"Some text".to_string(),
"```python markdown-code-runner".to_string(),
"print('Hello, world!')".to_string(),
"```".to_string(),
"<!-- OUTPUT:START -->".to_string(),
"old output".to_string(),
"<!-- OUTPUT:END -->".to_string(),
];
let output = process_markdown(&input, false, false, true).unwrap();
assert!(output.contains(&"Hello, world!".to_string()));
assert!(!output.contains(&"old output".to_string()));
}
#[test]
fn test_process_with_skip() {
let input = vec![
"<!-- CODE:SKIP -->".to_string(),
"```python markdown-code-runner".to_string(),
"print('Hello, world!')".to_string(),
"```".to_string(),
"<!-- OUTPUT:START -->".to_string(),
"old output".to_string(),
"<!-- OUTPUT:END -->".to_string(),
];
let output = process_markdown(&input, false, false, true).unwrap();
assert!(output.contains(&"old output".to_string()));
}
#[test]
fn test_process_execute_false() {
let input = vec![
"```python markdown-code-runner".to_string(),
"print('Hello')".to_string(),
"```".to_string(),
];
let output = process_markdown(&input, false, false, false).unwrap();
assert_eq!(input, output);
}
}