string_pipeline 0.14.0

A flexible, template-driven string transformation pipeline for Rust.
Documentation
template = { "{" ~ debug_flag? ~ operation_list? ~ "}" }

debug_flag = @{ "!" }

operation_list = { operation ~ ("|" ~ operation)* }

operation = {
    shorthand_range
  | shorthand_index
  | split
  | upper
  | lower
  | trim
  | append
  | prepend
  | surround
  | quote
  | join
  | substring
  | replace
  | map
  | filter
  | filter_not
  | slice
  | sort
  | reverse
  | unique
  | regex_extract
  | strip_ansi
  | pad
}

shorthand_index = { number }
shorthand_range = {
    range_to_inclusive
  | range_to
  | range_inclusive
  | range_exclusive
  | range_from
  | range_full
}

// Main operations - using specific arg types where needed
regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
filter_not    = { "filter_not" ~ ":" ~ regex_arg }
filter        = { "filter" ~ ":" ~ regex_arg }
strip_ansi    = @{ "strip_ansi" }
map           = { "map" ~ ":" ~ map_operation }
split         = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
substring     = { "substring" ~ ":" ~ range_spec }
replace       = { "replace" ~ ":" ~ sed_string }
append        = { "append" ~ ":" ~ simple_arg }
prepend       = { "prepend" ~ ":" ~ simple_arg }
surround      = { "surround" ~ ":" ~ simple_arg }
quote         = { "quote" ~ ":" ~ simple_arg }
upper         = @{ "upper" }
lower         = @{ "lower" }
trim          = { "trim" ~ (":" ~ simple_arg)? ~ (":" ~ direction)? }
join          = { "join" ~ ":" ~ simple_arg }
slice         = { "slice" ~ ":" ~ range_spec }
sort          = { "sort" ~ (":" ~ sort_direction)? }
reverse       = @{ "reverse" }
unique        = @{ "unique" }
pad           = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)? }

// Direction specifiers
direction      = @{ "left" | "right" | "both" }
sort_direction = @{ "asc" | "desc" }
pad_char       = @{ simple_arg_content+ }

// Map operation
map_operation       = { "{" ~ map_operation_list ~ "}" }
map_operation_list  = { map_inner_operation ~ ("|" ~ map_inner_operation)* }
map_inner_operation = {
    strip_ansi
  | substring
  | replace
  | append
  | prepend
  | surround
  | quote
  | upper
  | lower
  | trim
  | pad
  | reverse
  | map_split
  | map_join
  | map_slice
  | map_sort
  | map_unique
  | map_filter
  | map_filter_not
  | map_regex_extract
}

// Map-specific operations that need special handling
map_split      = { "split" ~ ":" ~ split_arg ~ (":" ~ range_spec)? }
map_join       = { "join" ~ ":" ~ simple_arg }
map_slice      = { "slice" ~ ":" ~ range_spec }
map_sort       = { "sort" ~ (":" ~ sort_direction)? }
map_unique     = @{ "unique" }
map_filter     = { "filter" ~ ":" ~ map_regex_arg }
map_filter_not = { "filter_not" ~ ":" ~ map_regex_arg }

// Map-specific regex extract
map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }

// Simplified argument handling - three types to handle specific cases
simple_arg         = @{ simple_arg_content* }
simple_arg_content =  { escaped_char | simple_normal_char }
simple_normal_char =  { !(":" | "|" | "}" | "{" | "\\") ~ ANY }

// Split args - need to handle pipes that aren't operations
split_arg          = @{ (split_escaped_char | split_content)* }
split_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
split_escaped_char =  { "\\" ~ ANY }

// Regex args - need to handle pipes and braces in regex patterns
regex_arg          = @{ (regex_escaped_char | regex_content)* }
regex_content      =  { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
regex_escaped_char =  { "\\" ~ ANY }

// Map regex args - handle braces in regex patterns
map_regex_arg          = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
map_regex_brace        =  { "{" ~ (!"}" ~ ANY)* ~ "}" }
map_regex_content      =  { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
map_regex_escaped_char =  { "\\" ~ ANY }

// Common escaped character handling
escaped_char = { "\\" ~ ANY }

// Operation keywords for lookahead (simplified list)
operation_keyword = _{
    "split"
  | "upper"
  | "lower"
  | "trim"
  | "append"
  | "prepend"
  | "surround"
  | "quote"
  | "join"
  | "substring"
  | "replace"
  | "map"
  | "filter"
  | "filter_not"
  | "slice"
  | "sort"
  | "reverse"
  | "unique"
  | "regex_extract"
  | "strip_ansi"
  | "pad"
}

// Range parts for lookahead
range_part = _{ ".." | "..=" }

// Sed strings
sed_string       =  { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }
sed_pattern      = @{ sed_content }
sed_replacement  = @{ sed_content }
sed_content      =  { (sed_escaped_char | sed_normal_char)* }
sed_normal_char  =  { !("/" | "\\") ~ ANY }
sed_escaped_char =  { "\\" ~ ANY }
sed_flags        = @{ ASCII_ALPHA* }

// Range specifications
range_spec = {
    range_to_inclusive
  | range_to
  | range_inclusive
  | range_exclusive
  | range_from
  | range_full
  | index
}

range_inclusive    = { number? ~ "..=" ~ number? }
range_exclusive    = { number? ~ ".." ~ number? }
range_from         = { number ~ ".." }
range_to           = { ".." ~ number }
range_to_inclusive = { "..=" ~ number }
range_full         = { ".." }
index              = { number }

number = @{ "-"? ~ ASCII_DIGIT+ }