use crate::config::WrapMode;
use crate::parser::utils::chunk_options::ChunkOptionValue;
use crate::parser::utils::chunk_options::hashpipe_comment_prefix;
use crate::parser::utils::hashpipe_normalizer::normalize_hashpipe_header;
use crate::syntax::{AstNode, ChunkInfoItem, CodeInfo, SyntaxNode};
use crate::yaml_engine;
type ClassifiedOption = (String, ChunkOptionValue);
type CstOption = (Option<String>, Option<String>, bool);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ValueType {
Boolean,
Numeric,
String, QuotedStringOnly, }
const HASHPIPE_SAFE_OPTIONS: &[(&str, &[ValueType])] = &[
("label", &[ValueType::QuotedStringOnly]), ("eval", &[ValueType::Boolean]), ("echo", &[ValueType::Boolean]), ("results", &[ValueType::String]), ("collapse", &[ValueType::Boolean]),
("warning", &[ValueType::Boolean]), ("message", &[ValueType::Boolean]),
("error", &[ValueType::Boolean]), ("include", &[ValueType::Boolean]),
("strip-white", &[ValueType::Boolean]),
("comment", &[ValueType::String]),
("highlight", &[ValueType::Boolean]),
("prompt", &[ValueType::Boolean]),
("size", &[ValueType::String]), ("background", &[ValueType::String]), ("cache", &[ValueType::Boolean]), ("cache-path", &[ValueType::String]),
("cache-lazy", &[ValueType::Boolean]),
("cache-comments", &[ValueType::Boolean]),
("cache-rebuild", &[ValueType::Boolean]),
("autodep", &[ValueType::Boolean]),
("fig-path", &[ValueType::String]),
("fig-keep", &[ValueType::String]), ("fig-show", &[ValueType::String]), ("dev", &[ValueType::String]), ("fig-width", &[ValueType::Numeric]),
("fig-height", &[ValueType::Numeric]),
("fig-asp", &[ValueType::Numeric]), ("fig-dim", &[ValueType::Numeric]), ("out-width", &[ValueType::String]), ("out-height", &[ValueType::String]),
("fig-align", &[ValueType::String]), ("fig-env", &[ValueType::String]),
("fig-pos", &[ValueType::String]),
("fig-scap", &[ValueType::String]),
("fig-cap", &[ValueType::String]),
("fig-alt", &[ValueType::String]),
("fig-subcap", &[ValueType::String]),
("dpi", &[ValueType::Numeric]),
("aniopts", &[ValueType::String]),
("ffmpeg-format", &[ValueType::String]),
("code-fold", &[ValueType::Boolean, ValueType::String]), ("code-summary", &[ValueType::String]),
("code-overflow", &[ValueType::String]), ("code-line-numbers", &[ValueType::Boolean]),
("classes", &[ValueType::String]),
];
const OPTION_NAME_OVERRIDES: &[(&str, &str)] = &[
("fig.cap", "fig-cap"),
("fig.alt", "fig-alt"),
("fig.width", "fig-width"),
("fig.height", "fig-height"),
("fig.align", "fig-align"),
("fig.pos", "fig-pos"),
("fig.env", "fig-env"),
("fig.scap", "fig-scap"),
("fig.lp", "fig-lp"),
("fig.subcap", "fig-subcap"),
("fig.ncol", "fig-ncol"),
("fig.sep", "fig-sep"),
("fig.process", "fig-process"),
("fig.show", "fig-show"),
("fig.keep", "fig-keep"),
("out.width", "out-width"),
("out.height", "out-height"),
("out.extra", "out-extra"),
];
pub fn get_comment_prefix(language: &str) -> Option<&'static str> {
hashpipe_comment_prefix(language)
}
pub fn normalize_option_name(name: &str) -> String {
for (old, new) in OPTION_NAME_OVERRIDES {
if name == *old {
return (*new).to_string();
}
}
name.replace('.', "-")
}
pub fn normalize_value(value: &str) -> String {
match value {
"TRUE" | "T" => "true".to_string(),
"FALSE" | "F" => "false".to_string(),
_ => value.to_string(),
}
}
pub fn split_options_from_cst_with_content(
info_node: &SyntaxNode,
content: &str,
prefix: &str,
) -> ((Vec<ClassifiedOption>, Vec<CstOption>), bool) {
#[derive(Clone)]
enum Entry {
Simple(ClassifiedOption),
Complex(CstOption),
}
fn upsert(entries: &mut Vec<(String, Entry)>, normalized_key: String, entry: Entry) {
if let Some(pos) = entries.iter().position(|(k, _)| *k == normalized_key) {
entries[pos] = (normalized_key, entry);
} else {
entries.push((normalized_key, entry));
}
}
fn insert_if_absent(entries: &mut Vec<(String, Entry)>, normalized_key: String, entry: Entry) {
if entries.iter().any(|(k, _)| *k == normalized_key) {
return;
}
entries.push((normalized_key, entry));
}
fn push_inline_option(
entries: &mut Vec<(String, Entry)>,
key: String,
value: String,
is_quoted: bool,
) {
let normalized_key = normalize_option_name(&key);
if let Some(classified_value) =
classify_option_for_hashpipe(&normalized_key, &value, is_quoted)
{
upsert(
entries,
normalized_key.clone(),
Entry::Simple((normalized_key, classified_value)),
);
} else {
upsert(
entries,
normalized_key,
Entry::Complex((Some(key), Some(value), is_quoted)),
);
}
}
fn push_content_option(
entries: &mut Vec<(String, Entry)>,
key: String,
value: String,
is_quoted: bool,
) {
let normalized_key = normalize_option_name(&key);
let rendered = if is_quoted {
format!("\"{}\"", value)
} else {
value
};
insert_if_absent(
entries,
normalized_key.clone(),
Entry::Simple((normalized_key, ChunkOptionValue::Simple(rendered))),
);
}
let mut entries: Vec<(String, Entry)> = Vec::new();
let mut had_content_hashpipe = false;
let mut pending_label_parts: Vec<String> = Vec::new();
let Some(info) = CodeInfo::cast(info_node.clone()) else {
return ((Vec::new(), Vec::new()), false);
};
for item in info.chunk_items() {
match item {
ChunkInfoItem::Label(label) => {
let label_value = label.text();
if !label_value.is_empty() {
pending_label_parts.push(label_value);
}
}
ChunkInfoItem::Option(opt) => {
if !pending_label_parts.is_empty() {
upsert(
&mut entries,
"label".to_string(),
Entry::Simple((
"label".to_string(),
ChunkOptionValue::Simple(pending_label_parts.join(" ")),
)),
);
pending_label_parts.clear();
}
if let (Some(key), Some(value)) = (opt.key(), opt.value()) {
push_inline_option(&mut entries, key, value, opt.is_quoted());
}
}
}
}
if !pending_label_parts.is_empty() {
upsert(
&mut entries,
"label".to_string(),
Entry::Simple((
"label".to_string(),
ChunkOptionValue::Simple(pending_label_parts.join(" ")),
)),
);
}
if let Some(normalized) = normalize_hashpipe_header(content, prefix)
&& let Some(options) = extract_options_from_normalized_yaml(&normalized.normalized_yaml)
{
had_content_hashpipe = true;
for (key, value) in options {
push_content_option(&mut entries, key, value, false);
}
}
let mut simple = Vec::new();
let mut complex = Vec::new();
for (_, entry) in entries {
match entry {
Entry::Simple(s) => simple.push(s),
Entry::Complex(c) => complex.push(c),
}
}
((simple, complex), had_content_hashpipe)
}
fn extract_options_from_normalized_yaml(normalized_yaml: &str) -> Option<Vec<(String, String)>> {
let yaml_syntax = yaml_parser::parse(normalized_yaml).ok()?;
let root = yaml_parser::ast::Root::cast(yaml_syntax)?;
let map = root
.documents()
.next()
.and_then(|doc| doc.block())
.and_then(|block| block.block_map())?;
let mut options = Vec::new();
for entry in map.entries() {
let key = hashpipe_map_entry_key(&entry)?;
let value = hashpipe_map_entry_value_text(normalized_yaml, &entry);
options.push((key, value));
}
Some(options)
}
fn hashpipe_map_entry_key(entry: &yaml_parser::ast::BlockMapEntry) -> Option<String> {
let key = entry.key()?;
if let Some(flow) = key.flow() {
return hashpipe_flow_scalar_text(&flow);
}
let block = key.block()?;
let flow = hashpipe_block_to_flow_scalar(&block)?;
hashpipe_flow_scalar_text(&flow)
}
fn hashpipe_map_entry_value_text(
normalized_yaml: &str,
entry: &yaml_parser::ast::BlockMapEntry,
) -> String {
let Some(value) = entry.value() else {
return String::new();
};
if let Some(flow) = value.flow() {
return hashpipe_flow_value_text(&flow).unwrap_or_else(|| {
let range = flow.syntax().text_range();
let start: usize = range.start().into();
let end: usize = range.end().into();
normalized_yaml[start..end].trim().to_string()
});
}
if let Some(block) = value.block() {
let range = block.syntax().text_range();
let start: usize = range.start().into();
let end: usize = range.end().into();
return normalized_yaml[start..end].to_string();
}
let range = value.syntax().text_range();
let start: usize = range.start().into();
let end: usize = range.end().into();
normalized_yaml[start..end].to_string()
}
fn hashpipe_block_to_flow_scalar(
block: &yaml_parser::ast::Block,
) -> Option<yaml_parser::ast::Flow> {
block
.syntax()
.children()
.find_map(yaml_parser::ast::Flow::cast)
}
fn hashpipe_flow_scalar_text(flow: &yaml_parser::ast::Flow) -> Option<String> {
let token = if let Some(token) = flow.plain_scalar() {
token
} else if let Some(token) = flow.single_quoted_scalar() {
token
} else if let Some(token) = flow.double_qouted_scalar() {
token
} else {
return None;
};
let mut value = token.text().to_string();
if token.kind() == yaml_parser::SyntaxKind::SINGLE_QUOTED_SCALAR {
value = value.trim_matches('\'').to_string();
} else if token.kind() == yaml_parser::SyntaxKind::DOUBLE_QUOTED_SCALAR {
value = value.trim_matches('"').to_string();
}
Some(value)
}
fn hashpipe_flow_value_text(flow: &yaml_parser::ast::Flow) -> Option<String> {
if let Some(token) = flow.plain_scalar() {
return Some(token.text().to_string());
}
if let Some(token) = flow.single_quoted_scalar() {
return Some(token.text().to_string());
}
if let Some(token) = flow.double_qouted_scalar() {
return Some(token.text().to_string());
}
None
}
fn is_yaml_block_scalar_indicator(value: &str) -> bool {
let s = value.trim();
if s.is_empty() {
return false;
}
let mut chars = s.chars();
let Some(style) = chars.next() else {
return false;
};
if style != '|' && style != '>' {
return false;
}
chars.all(|ch| ch == '+' || ch == '-' || ch.is_ascii_digit())
}
fn classify_option_for_hashpipe(
key: &str,
value: &str,
is_quoted: bool,
) -> Option<ChunkOptionValue> {
use crate::parser::utils::chunk_options::{is_boolean_literal, is_numeric_literal};
let allowed_types = HASHPIPE_SAFE_OPTIONS
.iter()
.find(|(name, _)| *name == key)
.map(|(_, types)| *types)?;
if is_quoted {
if allowed_types.contains(&ValueType::String)
|| allowed_types.contains(&ValueType::QuotedStringOnly)
{
return Some(ChunkOptionValue::Simple(format!("\"{}\"", value)));
}
} else {
if allowed_types.contains(&ValueType::Boolean)
&& (is_boolean_literal(value) || matches!(value, "true" | "false"))
{
return Some(ChunkOptionValue::Simple(value.to_ascii_lowercase()));
}
if is_numeric_literal(value) && allowed_types.contains(&ValueType::Numeric) {
return Some(ChunkOptionValue::Simple(value.to_string()));
}
if allowed_types.contains(&ValueType::String) && is_simple_bareword(value) {
return Some(ChunkOptionValue::Simple(value.to_string()));
}
}
None
}
fn is_simple_bareword(_s: &str) -> bool {
false
}
pub fn format_hashpipe_option_with_wrap(
prefix: &str,
key: &str,
value: &str,
line_width: usize,
) -> Vec<String> {
fn floor_char_boundary(s: &str, max: usize) -> usize {
let mut idx = max.min(s.len());
while idx > 0 && !s.is_char_boundary(idx) {
idx -= 1;
}
idx
}
if let Some((first, rest)) = value.split_once('\n')
&& is_yaml_block_scalar_indicator(first)
{
let mut lines = vec![format!("{} {}: {}", prefix, key, first)];
lines.extend(rest.split('\n').map(|line| format!("{}{}", prefix, line)));
return lines;
}
if let Some((first, rest)) = value.split_once('\n') {
let mut lines = vec![if first.is_empty() {
format!("{} {}:", prefix, key)
} else {
format!("{} {}: {}", prefix, key, first)
}];
lines.extend(rest.split('\n').map(|line| format!("{} {}", prefix, line)));
return lines;
}
let first_line = format!("{} {}: {}", prefix, key, value);
if first_line.len() <= line_width {
return vec![first_line];
}
let first_prefix = format!("{} {}: ", prefix, key);
let available_first = line_width.saturating_sub(first_prefix.len());
if available_first < 10 {
return vec![first_line];
}
let continuation_prefix = format!("{} ", prefix); let available_continuation = line_width.saturating_sub(continuation_prefix.len());
let mut lines = Vec::new();
let mut remaining = value;
let mut is_first = true;
while !remaining.is_empty() {
let available = if is_first {
available_first
} else {
available_continuation
};
let break_point = if remaining.len() <= available {
remaining.len()
} else {
let upper = floor_char_boundary(remaining, available);
if upper == 0 {
remaining
.char_indices()
.nth(1)
.map(|(i, _)| i)
.unwrap_or(remaining.len())
} else {
remaining[..upper]
.rfind(' ')
.map(|i| i + 1) .unwrap_or(upper) }
};
let chunk = &remaining[..break_point].trim_end();
if is_first {
lines.push(format!("{}{}", first_prefix, chunk));
is_first = false;
} else {
lines.push(format!("{}{}", continuation_prefix, chunk));
}
remaining = remaining[break_point..].trim_start();
}
lines
}
pub fn format_as_hashpipe(
language: &str,
options: &[ClassifiedOption],
line_width: usize,
wrap: Option<&WrapMode>,
) -> Option<Vec<String>> {
let prefix = get_comment_prefix(language)?; let mut output = Vec::new();
let mut yaml_entries: Vec<(String, String)> = Vec::new();
for (key, value) in options {
if let ChunkOptionValue::Simple(v) = value {
let norm_key = normalize_option_name(key);
let norm_val = normalize_value(v);
let value_str = if norm_val.is_empty() {
"true".to_string() } else {
norm_val
};
yaml_entries.push((norm_key.clone(), value_str.clone()));
let lines = format_hashpipe_option_with_wrap(prefix, &norm_key, &value_str, line_width);
output.extend(lines);
}
}
if !yaml_entries.is_empty() {
let yaml_text = yaml_entries
.iter()
.map(|(key, value)| {
if value.starts_with('\n') {
if value.ends_with('\n') {
format!("{key}:{value}")
} else {
format!("{key}:{value}\n")
}
} else {
format!("{key}: {value}\n")
}
})
.collect::<String>();
let yaml_print_width = line_width.saturating_sub(prefix.len() + 1);
let yaml_config = crate::config::Config {
line_width: yaml_print_width,
wrap: wrap.cloned(),
..Default::default()
};
if let Ok(formatted_yaml) = yaml_engine::format_yaml_with_config(&yaml_text, &yaml_config) {
let lines = formatted_yaml
.lines()
.map(|line| {
if line.is_empty() {
prefix.to_string()
} else {
format!("{} {}", prefix, line)
}
})
.collect::<Vec<_>>();
return Some(lines);
}
}
Some(output)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::utils::chunk_options::ChunkOptionValue;
#[test]
fn test_get_comment_prefix_r() {
assert_eq!(get_comment_prefix("r"), Some("#|"));
assert_eq!(get_comment_prefix("R"), Some("#|"));
}
#[test]
fn test_get_comment_prefix_python() {
assert_eq!(get_comment_prefix("python"), Some("#|"));
assert_eq!(get_comment_prefix("Python"), Some("#|"));
}
#[test]
fn test_get_comment_prefix_cpp() {
assert_eq!(get_comment_prefix("cpp"), Some("//|"));
assert_eq!(get_comment_prefix("c++"), Some("//|"));
assert_eq!(get_comment_prefix("C++"), Some("//|"));
}
#[test]
fn test_get_comment_prefix_sql() {
assert_eq!(get_comment_prefix("sql"), Some("--|"));
assert_eq!(get_comment_prefix("SQL"), Some("--|"));
}
#[test]
fn test_get_comment_prefix_unknown() {
assert_eq!(get_comment_prefix("unknown"), None);
assert_eq!(get_comment_prefix("fortran"), None);
assert_eq!(get_comment_prefix("matlab"), None);
}
#[test]
fn test_normalize_option_name_override() {
assert_eq!(normalize_option_name("fig.cap"), "fig-cap");
assert_eq!(normalize_option_name("fig.width"), "fig-width");
}
#[test]
fn test_normalize_option_name_default() {
assert_eq!(normalize_option_name("my.option"), "my-option");
assert_eq!(normalize_option_name("some.long.name"), "some-long-name");
}
#[test]
fn test_normalize_option_name_no_dots() {
assert_eq!(normalize_option_name("echo"), "echo");
assert_eq!(normalize_option_name("warning"), "warning");
}
#[test]
fn test_normalize_value_booleans() {
assert_eq!(normalize_value("TRUE"), "true");
assert_eq!(normalize_value("FALSE"), "false");
assert_eq!(normalize_value("T"), "true");
assert_eq!(normalize_value("F"), "false");
}
#[test]
fn test_normalize_value_other() {
assert_eq!(normalize_value("7"), "7");
assert_eq!(normalize_value("\"hello\""), "\"hello\"");
assert_eq!(normalize_value("3.14"), "3.14");
}
#[test]
fn test_format_hashpipe_option_short() {
let lines = format_hashpipe_option_with_wrap("#|", "echo", "true", 80);
assert_eq!(lines.len(), 1);
assert_eq!(lines[0], "#| echo: true");
}
#[test]
fn test_format_hashpipe_option_wrap() {
let long_caption =
"This is a very long caption that definitely exceeds the line width and needs to wrap";
let lines = format_hashpipe_option_with_wrap("#|", "fig-cap", long_caption, 80);
assert!(lines.len() > 1, "Should wrap into multiple lines");
assert!(lines[0].starts_with("#| fig-cap:"));
assert!(lines[1].starts_with("#| ")); assert!(lines[0].len() <= 80);
}
#[test]
fn test_format_hashpipe_option_wrap_handles_utf8_boundaries() {
let value = "comparison data for three methods:- Student’s t, Bayes factor, and Welch’s t.";
let lines = format_hashpipe_option_with_wrap("#|", "fig-cap", value, 60);
assert!(lines.len() > 1, "Should wrap into multiple lines");
assert!(lines[0].starts_with("#| fig-cap:"));
assert!(lines[1].starts_with("#| "));
}
#[test]
fn test_format_hashpipe_option_block_scalar() {
let value = "|\n A caption\n spanning lines";
let lines = format_hashpipe_option_with_wrap("#|", "fig-cap", value, 80);
assert_eq!(
lines,
vec!["#| fig-cap: |", "#| A caption", "#| spanning lines"]
);
}
#[test]
fn test_format_hashpipe_option_indented_yaml_multiline() {
let value = "\n - a\n - b";
let lines = format_hashpipe_option_with_wrap("#|", "list", value, 80);
assert_eq!(lines, vec!["#| list:", "#| - a", "#| - b"]);
}
#[test]
fn test_format_as_hashpipe_simple() {
let options = vec![
(
"echo".to_string(),
ChunkOptionValue::Simple("TRUE".to_string()),
),
(
"fig.width".to_string(),
ChunkOptionValue::Simple("7".to_string()),
),
];
let lines = format_as_hashpipe("r", &options, 80, None).unwrap();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "#| echo: true");
assert_eq!(lines[1], "#| fig-width: 7");
}
#[test]
fn test_format_as_hashpipe_skips_expressions() {
let options = vec![
(
"echo".to_string(),
ChunkOptionValue::Simple("TRUE".to_string()),
),
(
"label".to_string(),
ChunkOptionValue::Expression("my_var".to_string()),
),
];
let lines = format_as_hashpipe("r", &options, 80, None).unwrap();
assert_eq!(lines.len(), 1); assert_eq!(lines[0], "#| echo: true");
}
#[test]
fn test_format_as_hashpipe_unknown_language() {
let options = vec![(
"echo".to_string(),
ChunkOptionValue::Simple("TRUE".to_string()),
)];
let result = format_as_hashpipe("fortran", &options, 80, None);
assert!(result.is_none());
}
}