use pest::Parser;
use pest_derive::Parser;
use smallvec::SmallVec;
use super::{PadDirection, RangeSpec, SortDirection, StringOp, TrimDirection};
use super::template::TemplateSection;
const SPACE_SEP: &str = " ";
#[derive(Parser)]
#[grammar = "pipeline/template.pest"]
struct TemplateParser;
pub fn parse_template(template: &str) -> Result<(Vec<StringOp>, bool), String> {
let pairs = TemplateParser::parse(Rule::template, template)
.map_err(|e| format!("Parse error: {e}"))?
.next()
.unwrap();
let estimated_capacity = if template.len() < 50 {
4 } else if template.len() < 150 {
8 } else {
16 };
let mut ops = Vec::with_capacity(estimated_capacity);
let mut debug = false;
for pair in pairs.into_inner() {
match pair.as_rule() {
Rule::operation_list => {
for op_pair in pair.into_inner() {
let inner = op_pair.into_inner().next().unwrap();
ops.push(parse_operation(inner)?);
}
}
Rule::debug_flag => {
debug = true;
}
_ => {}
}
}
Ok((ops, debug))
}
pub fn parse_template_sections(template: &str) -> Result<(Vec<TemplateSection>, bool), String> {
let mut sections = Vec::new();
let mut current_literal = String::new();
let mut chars = template.chars().peekable();
let mut debug = false;
while let Some(ch) = chars.next() {
if ch == '{' {
if current_literal.ends_with('$') {
current_literal.push(ch);
let mut brace_count = 1;
for inner_ch in chars.by_ref() {
current_literal.push(inner_ch);
if inner_ch == '{' {
brace_count += 1;
} else if inner_ch == '}' {
brace_count -= 1;
if brace_count == 0 {
break; }
}
}
if brace_count > 0 {
return Err("Unclosed shell variable brace".to_string());
}
} else {
if !current_literal.is_empty() {
sections.push(TemplateSection::Literal(std::mem::take(
&mut current_literal,
)));
}
let mut brace_count = 1;
let mut template_content = String::new();
for inner_ch in chars.by_ref() {
if inner_ch == '{' {
brace_count += 1;
template_content.push(inner_ch);
} else if inner_ch == '}' {
brace_count -= 1;
if brace_count == 0 {
break; } else {
template_content.push(inner_ch);
}
} else {
template_content.push(inner_ch);
}
}
if brace_count > 0 {
return Err("Unclosed template brace".to_string());
}
let full_template = format!("{{{template_content}}}");
let (ops, section_debug) = parse_template(&full_template)?;
if section_debug {
debug = true; }
sections.push(TemplateSection::from_ops(ops));
}
} else {
current_literal.push(ch);
}
}
if !current_literal.is_empty() {
sections.push(TemplateSection::Literal(std::mem::take(
&mut current_literal,
)));
}
Ok((sections, debug))
}
#[allow(dead_code)]
#[deprecated(
since = "0.14.0",
note = "use `parse_template_sections` instead; `parse_multi_template` will be removed in the next major release"
)]
pub fn parse_multi_template(template: &str) -> Result<(Vec<TemplateSection>, bool), String> {
parse_template_sections(template)
}
fn parse_operation(pair: pest::iterators::Pair<Rule>) -> Result<StringOp, String> {
match pair.as_rule() {
Rule::shorthand_range => {
let range = parse_range_spec(pair)?;
Ok(StringOp::Split {
sep: SPACE_SEP.to_string(),
range,
})
}
Rule::shorthand_index => {
let idx = pair.as_str().parse().unwrap();
Ok(StringOp::Split {
sep: SPACE_SEP.to_string(),
range: RangeSpec::Index(idx),
})
}
Rule::split => {
let mut parts = pair.into_inner();
let sep_part = parts.next().unwrap();
let sep = process_arg(sep_part.as_str());
let range = if let Some(range_part) = parts.next() {
parse_range_spec(range_part)?
} else {
RangeSpec::Range(None, None, false)
};
Ok(StringOp::Split { sep, range })
}
Rule::join => Ok(StringOp::Join {
sep: extract_single_arg(pair)?,
}),
Rule::substring => Ok(StringOp::Substring {
range: extract_range_arg(pair)?,
}),
Rule::replace => {
let sed_parts = parse_sed_string(pair.into_inner().next().unwrap())?;
Ok(StringOp::Replace {
pattern: sed_parts.0,
replacement: sed_parts.1,
flags: sed_parts.2,
})
}
Rule::upper => Ok(StringOp::Upper),
Rule::lower => Ok(StringOp::Lower),
Rule::trim => {
let chars = parse_trim_chars(pair.clone());
let direction = parse_trim_direction(pair);
Ok(StringOp::Trim { chars, direction })
}
Rule::append => Ok(StringOp::Append {
suffix: extract_single_arg(pair)?,
}),
Rule::prepend => Ok(StringOp::Prepend {
prefix: extract_single_arg(pair)?,
}),
Rule::surround => Ok(StringOp::Surround {
text: extract_single_arg(pair)?,
}),
Rule::quote => Ok(StringOp::Surround {
text: extract_single_arg(pair)?,
}),
Rule::strip_ansi => Ok(StringOp::StripAnsi),
Rule::filter => Ok(StringOp::Filter {
pattern: extract_single_arg_raw(pair)?,
}),
Rule::filter_not => Ok(StringOp::FilterNot {
pattern: extract_single_arg_raw(pair)?,
}),
Rule::slice => Ok(StringOp::Slice {
range: extract_range_arg(pair)?,
}),
Rule::sort => Ok(StringOp::Sort {
direction: parse_sort_direction(pair),
}),
Rule::reverse => Ok(StringOp::Reverse),
Rule::unique => Ok(StringOp::Unique),
Rule::pad => parse_pad_operation(pair),
Rule::regex_extract | Rule::map_regex_extract => parse_regex_extract_operation(pair),
Rule::map => parse_map_operation(pair),
_ => Err(format!("Unsupported operation: {:?}", pair.as_rule())),
}
}
fn extract_single_arg(pair: pest::iterators::Pair<Rule>) -> Result<String, String> {
let inner = pair.into_inner().next().unwrap();
Ok(process_arg(inner.as_str()))
}
fn extract_single_arg_raw(pair: pest::iterators::Pair<Rule>) -> Result<String, String> {
Ok(pair.into_inner().next().unwrap().as_str().to_string())
}
fn extract_range_arg(pair: pest::iterators::Pair<Rule>) -> Result<RangeSpec, String> {
parse_range_spec(pair.into_inner().next().unwrap())
}
#[inline(always)]
fn parse_trim_chars(pair: pest::iterators::Pair<Rule>) -> String {
let mut parts = pair.into_inner();
let first = match parts.next() {
Some(p) => p,
None => return String::new(),
};
if let Some(_second) = parts.next() {
first.as_str().to_string()
} else {
match first.as_str() {
"left" | "right" | "both" => String::new(), chars => chars.to_string(), }
}
}
fn parse_trim_direction(pair: pest::iterators::Pair<Rule>) -> TrimDirection {
let mut parts = pair.into_inner();
if let Some(first) = parts.next() {
if let Some(second) = parts.next() {
match second.as_str() {
"left" => return TrimDirection::Left,
"right" => return TrimDirection::Right,
"both" => return TrimDirection::Both,
_ => return TrimDirection::Both,
}
} else {
match first.as_str() {
"left" => return TrimDirection::Left,
"right" => return TrimDirection::Right,
"both" => return TrimDirection::Both,
_ => return TrimDirection::Both,
}
}
}
TrimDirection::Both
}
fn parse_sort_direction(pair: pest::iterators::Pair<Rule>) -> SortDirection {
if let Some(p) = pair.into_inner().next() {
match p.as_str() {
"desc" => SortDirection::Desc,
_ => SortDirection::Asc,
}
} else {
SortDirection::Asc
}
}
fn parse_pad_operation(pair: pest::iterators::Pair<Rule>) -> Result<StringOp, String> {
let mut parts = pair.into_inner();
let width = parts
.next()
.unwrap()
.as_str()
.parse()
.map_err(|_| "Invalid padding width")?;
let char = if let Some(char_part) = parts.next() {
let processed = process_arg(char_part.as_str());
processed.chars().next().unwrap_or(' ')
} else {
' '
};
let direction = parts
.next()
.map(|p| match p.as_str() {
"left" => PadDirection::Left,
"right" => PadDirection::Right,
"both" => PadDirection::Both,
_ => PadDirection::Right,
})
.unwrap_or(PadDirection::Right);
Ok(StringOp::Pad {
width,
char,
direction,
})
}
fn parse_regex_extract_operation(pair: pest::iterators::Pair<Rule>) -> Result<StringOp, String> {
let mut parts = pair.into_inner();
let pattern = parts.next().unwrap().as_str().to_string();
let group = parts.next().and_then(|p| p.as_str().parse().ok());
Ok(StringOp::RegexExtract { pattern, group })
}
fn parse_map_operation(pair: pest::iterators::Pair<Rule>) -> Result<StringOp, String> {
let map_op_pair = pair.into_inner().next().unwrap();
let operation_list_pair = map_op_pair.into_inner().next().unwrap();
let mut operations: SmallVec<[StringOp; 8]> = SmallVec::new();
for op_pair in operation_list_pair.into_inner() {
let inner_op_pair = op_pair.into_inner().next().unwrap();
operations.push(parse_map_inner_operation(inner_op_pair)?);
}
Ok(StringOp::Map {
operations: Box::new(operations),
})
}
fn parse_map_inner_operation(pair: pest::iterators::Pair<Rule>) -> Result<StringOp, String> {
match pair.as_rule() {
Rule::substring => Ok(StringOp::Substring {
range: extract_range_arg(pair)?,
}),
Rule::replace => {
let sed_parts = parse_sed_string(pair.into_inner().next().unwrap())?;
Ok(StringOp::Replace {
pattern: sed_parts.0,
replacement: sed_parts.1,
flags: sed_parts.2,
})
}
Rule::append => Ok(StringOp::Append {
suffix: extract_single_arg(pair)?,
}),
Rule::prepend => Ok(StringOp::Prepend {
prefix: extract_single_arg(pair)?,
}),
Rule::surround => Ok(StringOp::Surround {
text: extract_single_arg(pair)?,
}),
Rule::quote => Ok(StringOp::Surround {
text: extract_single_arg(pair)?,
}),
Rule::upper => Ok(StringOp::Upper),
Rule::lower => Ok(StringOp::Lower),
Rule::trim => {
let chars = parse_trim_chars(pair.clone());
let direction = parse_trim_direction(pair);
Ok(StringOp::Trim { chars, direction })
}
Rule::pad => parse_pad_operation(pair),
Rule::reverse => Ok(StringOp::Reverse),
Rule::strip_ansi => Ok(StringOp::StripAnsi),
Rule::map_regex_extract => parse_regex_extract_operation(pair),
Rule::map_split => {
let mut parts = pair.into_inner();
let sep_part = parts.next().unwrap();
let sep = process_arg(sep_part.as_str());
let range = if let Some(range_part) = parts.next() {
parse_range_spec(range_part)?
} else {
RangeSpec::Range(None, None, false)
};
Ok(StringOp::Split { sep, range })
}
Rule::map_join => Ok(StringOp::Join {
sep: extract_single_arg(pair)?,
}),
Rule::map_slice => Ok(StringOp::Slice {
range: extract_range_arg(pair)?,
}),
Rule::map_sort => Ok(StringOp::Sort {
direction: parse_sort_direction(pair),
}),
Rule::map_unique => Ok(StringOp::Unique),
Rule::map_filter => Ok(StringOp::Filter {
pattern: extract_single_arg_raw(pair)?,
}),
Rule::map_filter_not => Ok(StringOp::FilterNot {
pattern: extract_single_arg_raw(pair)?,
}),
_ => Err(format!("Unsupported map operation: {:?}", pair.as_rule())),
}
}
#[inline(always)]
fn process_arg(s: &str) -> String {
if !s.contains('\\') {
return s.to_string();
}
let mut result = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'n' => result.push('\n'),
b't' => result.push('\t'),
b'r' => result.push('\r'),
b':' => result.push(':'),
b'|' => result.push('|'),
b'\\' => result.push('\\'),
b'/' => result.push('/'),
b'{' => result.push('{'),
b'}' => result.push('}'),
other => result.push(other as char),
}
i += 2;
} else if bytes[i] == b'\\' {
result.push('\\');
i += 1;
} else {
result.push(bytes[i] as char);
i += 1;
}
}
result
}
fn parse_sed_string(pair: pest::iterators::Pair<Rule>) -> Result<(String, String, String), String> {
let mut parts = pair.into_inner();
let pattern_str = parts.next().unwrap().as_str();
let replacement_str = parts.next().unwrap().as_str();
let flags_opt = parts.next();
if pattern_str.is_empty() {
return Err("Empty pattern in sed string".to_string());
}
Ok((
pattern_str.to_string(),
replacement_str.to_string(),
flags_opt.map_or_else(String::new, |p| p.as_str().to_string()),
))
}
fn parse_range_spec(pair: pest::iterators::Pair<Rule>) -> Result<RangeSpec, String> {
let inner = pair.into_inner().next().unwrap();
match inner.as_rule() {
Rule::range_inclusive => {
let mut parts = inner.into_inner();
let start = parts.next().and_then(|p| p.as_str().parse().ok());
let end = parts.next().and_then(|p| p.as_str().parse().ok());
Ok(RangeSpec::Range(start, end, true))
}
Rule::range_exclusive => {
let mut parts = inner.into_inner();
let start = parts.next().and_then(|p| p.as_str().parse().ok());
let end = parts.next().and_then(|p| p.as_str().parse().ok());
Ok(RangeSpec::Range(start, end, false))
}
Rule::range_from => {
let start = inner.into_inner().next().unwrap().as_str().parse().ok();
Ok(RangeSpec::Range(start, None, false))
}
Rule::range_to => {
let end = inner.into_inner().next().unwrap().as_str().parse().ok();
Ok(RangeSpec::Range(None, end, false))
}
Rule::range_to_inclusive => {
let end = inner.into_inner().next().unwrap().as_str().parse().ok();
Ok(RangeSpec::Range(None, end, true))
}
Rule::range_full => Ok(RangeSpec::Range(None, None, false)),
Rule::index => {
let idx_str = inner.into_inner().next().unwrap().as_str();
let idx = idx_str
.parse()
.map_err(|_| format!("Invalid index: {idx_str}"))?;
Ok(RangeSpec::Index(idx))
}
_ => Err(format!("Unknown range spec: {:?}", inner.as_rule())),
}
}