use std::collections::{HashMap, HashSet, hash_map::DefaultHasher};
use std::fmt::Display;
use std::hash::{Hash, Hasher};
use std::ops::Range;
use crate::pipeline::get_cached_split;
use crate::pipeline::{DebugTracer, RangeSpec, StringOp, apply_ops_internal, apply_range, parser}; use memchr::memchr_iter;
#[derive(Debug, Clone)]
pub struct Template {
raw: String,
sections: Vec<TemplateSection>,
compiled_sections: Vec<CompiledSectionPlan>,
debug: bool,
}
#[derive(Debug, Clone)]
enum CompiledSectionPlan {
Literal,
Template {
exec: TemplateExecutionPlan,
cache_key: u64,
},
}
#[derive(Debug, Clone)]
struct TemplateExecutionPlan {
kind: TemplateExecutionKind,
cache_policy: CachePolicy,
}
#[derive(Debug, Clone)]
enum TemplateExecutionKind {
Passthrough,
SplitIndex { sep: String, idx: isize },
SplitJoinRewrite { split_sep: String, join_sep: String },
Generic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CachePolicy {
Never,
PerCall,
}
#[derive(Debug, Clone)]
pub enum TemplateSection {
Literal(String),
Template { ops: Vec<StringOp>, cache_key: u64 },
}
impl TemplateSection {
pub(crate) fn from_ops(ops: Vec<StringOp>) -> Self {
let cache_key = Template::hash_ops(&ops);
Self::Template { ops, cache_key }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionType {
Literal,
Template,
}
#[derive(Debug, Clone)]
pub struct SectionInfo {
pub section_type: SectionType,
pub overall_position: usize,
pub template_position: Option<usize>,
pub content: Option<String>,
pub operations: Option<Vec<StringOp>>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct TemplateOutput {
pub template_position: usize,
pub overall_position: usize,
pub rendered_range: Range<usize>,
}
impl TemplateOutput {
pub fn template_position(&self) -> usize {
self.template_position
}
pub fn overall_position(&self) -> usize {
self.overall_position
}
pub fn rendered_range(&self) -> Range<usize> {
self.rendered_range.clone()
}
pub fn as_str<'a>(&self, rendered: &'a str) -> &'a str {
&rendered[self.rendered_range.clone()]
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct RichFormatResult {
pub rendered: String,
pub template_outputs: Vec<TemplateOutput>,
}
impl RichFormatResult {
pub fn rendered(&self) -> &str {
&self.rendered
}
pub fn template_outputs(&self) -> &[TemplateOutput] {
&self.template_outputs
}
pub fn template_output(&self, index: usize) -> Option<&str> {
self.template_outputs
.get(index)
.map(|output| output.as_str(&self.rendered))
}
}
struct TemplateCache {
operations: HashMap<CacheKey, String>,
}
impl TemplateCache {
fn new() -> Self {
Self {
operations: HashMap::new(),
}
}
}
struct ExecutionContext<'a> {
input_hash: &'a mut Option<u64>,
cache: &'a mut TemplateCache,
dbg: Option<&'a DebugTracer>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct CacheKey {
input_hash: u64,
section_key: u64,
}
struct RenderBuffer {
rendered: String,
template_outputs: Option<Vec<TemplateOutput>>,
}
impl RenderBuffer {
fn new(rendered_capacity: usize, rich_capacity: Option<usize>) -> Self {
Self {
rendered: String::with_capacity(rendered_capacity),
template_outputs: rich_capacity.map(Vec::with_capacity),
}
}
fn push_literal(&mut self, text: &str) {
self.rendered.push_str(text);
}
fn push_template_output(
&mut self,
template_position: usize,
overall_position: usize,
output: String,
) {
let start = self.rendered.len();
self.rendered.push_str(&output);
let end = self.rendered.len();
if let Some(template_outputs) = &mut self.template_outputs {
template_outputs.push(TemplateOutput {
template_position,
overall_position,
rendered_range: start..end,
});
}
}
fn into_rendered(self) -> String {
self.rendered
}
fn into_rich(self) -> RichFormatResult {
RichFormatResult {
rendered: self.rendered,
template_outputs: self.template_outputs.unwrap_or_default(),
}
}
}
impl Template {
fn new(raw: String, sections: Vec<TemplateSection>, debug: bool) -> Self {
let compiled_sections = Self::compile_sections(§ions);
Self {
raw,
sections,
compiled_sections,
debug,
}
}
pub fn parse(template: &str) -> Result<Self, String> {
if let Some(single) = Self::try_single_block(template)? {
return Ok(single);
}
let (sections, _) = parser::parse_template_sections(template)?;
Ok(Self::new(template.to_string(), sections, false))
}
pub fn parse_with_debug(template: &str, debug: Option<bool>) -> Result<Self, String> {
if let Some(mut single) = Self::try_single_block(template)? {
if let Some(dbg_override) = debug {
single.debug = dbg_override;
}
return Ok(single);
}
let (sections, inner_dbg) = parser::parse_template_sections(template)?;
Ok(Self::new(
template.to_string(),
sections,
debug.unwrap_or(inner_dbg),
))
}
pub fn format(&self, input: &str) -> Result<String, String> {
self.render_single_input(input, false)
.map(RenderBuffer::into_rendered)
}
pub fn format_rich(&self, input: &str) -> Result<RichFormatResult, String> {
self.render_single_input(input, true)
.map(RenderBuffer::into_rich)
}
pub fn template_string(&self) -> &str {
&self.raw
}
pub fn section_count(&self) -> usize {
self.sections.len()
}
pub fn template_section_count(&self) -> usize {
self.sections
.iter()
.filter(|s| matches!(s, TemplateSection::Template { .. }))
.count()
}
pub fn is_debug(&self) -> bool {
self.debug
}
pub fn with_debug(mut self, debug: bool) -> Self {
self.debug = debug;
self
}
pub fn set_debug(&mut self, debug: bool) {
self.debug = debug;
}
pub fn format_with_inputs(
&self,
inputs: &[&[&str]],
separators: &[&str],
) -> Result<String, String> {
self.render_structured_inputs(inputs, separators, false)
.map(RenderBuffer::into_rendered)
}
pub fn format_with_inputs_rich(
&self,
inputs: &[&[&str]],
separators: &[&str],
) -> Result<RichFormatResult, String> {
self.render_structured_inputs(inputs, separators, true)
.map(RenderBuffer::into_rich)
}
pub fn get_template_sections(&self) -> Vec<(usize, &Vec<StringOp>)> {
let mut result = Vec::new();
let mut template_index = 0;
for section in &self.sections {
if let TemplateSection::Template { ops, .. } = section {
result.push((template_index, ops));
template_index += 1;
}
}
result
}
pub fn get_section_info(&self) -> Vec<SectionInfo> {
let mut result = Vec::new();
let mut template_position = 0;
for (overall_position, section) in self.sections.iter().enumerate() {
match section {
TemplateSection::Literal(text) => {
result.push(SectionInfo {
section_type: SectionType::Literal,
overall_position,
template_position: None,
content: Some(text.clone()),
operations: None,
});
}
TemplateSection::Template { ops, .. } => {
result.push(SectionInfo {
section_type: SectionType::Template,
overall_position,
template_position: Some(template_position),
content: None,
operations: Some(ops.clone()),
});
template_position += 1;
}
}
}
result
}
fn render_single_input(&self, input: &str, collect_rich: bool) -> Result<RenderBuffer, String> {
use std::time::Instant;
let mut cache = TemplateCache::new();
let mut input_hash = None;
let start_time = self.debug.then(Instant::now);
let tracer = self.debug.then(|| DebugTracer::new(true));
if let Some(tracer) = tracer.as_ref() {
let info = format!(
"{} sections (literal: {}, template: {})",
self.sections.len(),
self.sections.len() - self.template_section_count(),
self.template_section_count()
);
tracer.session_start("MULTI-TEMPLATE", &self.raw, input, Some(&info));
}
let buffer = self.render_sections(
self.estimate_output_capacity(input),
collect_rich,
tracer.as_ref(),
|_, ops, exec, cache_key, dbg| {
self.execute_template_section(
input,
ops,
exec,
cache_key,
ExecutionContext {
input_hash: &mut input_hash,
cache: &mut cache,
dbg,
},
)
},
)?;
if let (Some(tracer), Some(start_time)) = (tracer.as_ref(), start_time) {
tracer.session_end("MULTI-TEMPLATE", &buffer.rendered, start_time.elapsed());
}
Ok(buffer)
}
fn render_structured_inputs(
&self,
inputs: &[&[&str]],
separators: &[&str],
collect_rich: bool,
) -> Result<RenderBuffer, String> {
let template_sections_count = self.template_section_count();
let adjusted_inputs: Vec<&[&str]> = (0..template_sections_count)
.map(|i| inputs.get(i).copied().unwrap_or(&[]))
.collect();
let adjusted_separators: Vec<&str> = (0..template_sections_count)
.map(|i| separators.get(i).copied().unwrap_or(" "))
.collect();
let mut cache = TemplateCache::new();
self.render_sections(
self.literal_output_capacity(),
collect_rich,
None,
|template_position, ops, exec, cache_key, _| {
self.execute_structured_template_section(
adjusted_inputs[template_position],
adjusted_separators[template_position],
ops,
exec,
cache_key,
&mut cache,
)
},
)
}
fn render_sections<F>(
&self,
rendered_capacity: usize,
collect_rich: bool,
tracer: Option<&DebugTracer>,
mut render_template_section: F,
) -> Result<RenderBuffer, String>
where
F: FnMut(
usize,
&[StringOp],
&TemplateExecutionPlan,
u64,
Option<&DebugTracer>,
) -> Result<String, String>,
{
let mut buffer = RenderBuffer::new(
rendered_capacity,
collect_rich.then_some(self.template_section_count()),
);
let mut template_position = 0;
for (overall_position, (section, plan)) in self
.sections
.iter()
.zip(self.compiled_sections.iter())
.enumerate()
{
match (section, plan) {
(TemplateSection::Literal(text), CompiledSectionPlan::Literal) => {
if let Some(tracer) = tracer {
let preview = Self::literal_preview(text);
tracer.section(
overall_position + 1,
self.sections.len(),
"literal",
&preview,
);
}
buffer.push_literal(text);
if let Some(tracer) = tracer
&& overall_position + 1 < self.sections.len()
{
tracer.separator();
}
}
(
TemplateSection::Template { ops, .. },
CompiledSectionPlan::Template { exec, cache_key },
) => {
if let Some(tracer) = tracer {
let summary = Self::format_operations_summary(ops);
tracer.section(
overall_position + 1,
self.sections.len(),
"template",
&summary,
);
}
let output =
render_template_section(template_position, ops, exec, *cache_key, tracer)?;
buffer.push_template_output(template_position, overall_position, output);
template_position += 1;
}
_ => unreachable!("compiled section plan must match template sections"),
}
}
Ok(buffer)
}
fn execute_structured_template_section(
&self,
section_inputs: &[&str],
separator: &str,
ops: &[StringOp],
exec: &TemplateExecutionPlan,
cache_key: u64,
cache: &mut TemplateCache,
) -> Result<String, String> {
match section_inputs.len() {
0 => Ok(String::new()),
1 => {
let mut input_hash = Some(Self::hash_input(section_inputs[0]));
self.execute_template_section(
section_inputs[0],
ops,
exec,
cache_key,
ExecutionContext {
input_hash: &mut input_hash,
cache,
dbg: None,
},
)
}
_ => {
let mut results = Vec::with_capacity(section_inputs.len());
for input in section_inputs {
let mut input_hash = Some(Self::hash_input(input));
let result = self.execute_template_section(
input,
ops,
exec,
cache_key,
ExecutionContext {
input_hash: &mut input_hash,
cache,
dbg: None,
},
)?;
results.push(result);
}
Ok(results.join(separator))
}
}
}
fn execute_template_section(
&self,
input: &str,
ops: &[StringOp],
exec: &TemplateExecutionPlan,
section_key: u64,
ctx: ExecutionContext<'_>,
) -> Result<String, String> {
match exec.cache_policy {
CachePolicy::Never => {
if let Some(t) = ctx.dbg {
t.cache_operation("DIRECT EXEC", "cache disabled for unique section");
}
self.execute_template_section_inner(input, ops, &exec.kind, ctx.dbg)
}
CachePolicy::PerCall => {
let key = CacheKey {
input_hash: *ctx
.input_hash
.get_or_insert_with(|| Self::hash_input(input)),
section_key,
};
if let Some(cached) = ctx.cache.operations.get(&key) {
if let Some(t) = ctx.dbg {
t.cache_operation("CACHE HIT", "re-using formatted section");
}
return Ok(cached.clone());
}
if let Some(t) = ctx.dbg {
t.cache_operation("CACHE MISS", "computing section");
}
let out = self.execute_template_section_inner(input, ops, &exec.kind, ctx.dbg)?;
ctx.cache.operations.insert(key, out.clone());
Ok(out)
}
}
}
fn literal_preview(text: &str) -> String {
if text.trim().is_empty() && text.len() <= 2 {
"whitespace".to_string()
} else if text.len() <= 20 {
format!("'{text}'")
} else {
format!("'{}...' ({} chars)", &text[..15], text.len())
}
}
fn execute_template_section_inner(
&self,
input: &str,
ops: &[StringOp],
kind: &TemplateExecutionKind,
dbg: Option<&DebugTracer>,
) -> Result<String, String> {
match kind {
TemplateExecutionKind::Passthrough => {
if let Some(t) = dbg {
t.cache_operation("FAST PASSTHROUGH", "empty template section");
}
Ok(input.to_string())
}
TemplateExecutionKind::SplitIndex { sep, idx } => {
if let Some(t) = dbg {
t.cache_operation("FAST SPLIT", &format!("by '{sep}'"));
}
Ok(self.fast_split_index(input, sep, *idx))
}
TemplateExecutionKind::SplitJoinRewrite {
split_sep,
join_sep,
} => {
if let Some(t) = dbg {
t.cache_operation("FAST SPLIT+JOIN", "direct separator rewrite");
}
Ok(self.fast_split_join(input, split_sep, join_sep))
}
TemplateExecutionKind::Generic => {
let nested_dbg = if self.debug {
Some(DebugTracer::new(true))
} else {
None
};
apply_ops_internal(input, ops, self.debug, nested_dbg)
}
}
}
fn compile_sections(sections: &[TemplateSection]) -> Vec<CompiledSectionPlan> {
let mut repeated_keys = HashSet::with_capacity(sections.len());
let mut seen_keys = HashSet::with_capacity(sections.len());
for section in sections {
if let TemplateSection::Template { cache_key, .. } = section
&& !seen_keys.insert(*cache_key)
{
repeated_keys.insert(*cache_key);
}
}
sections
.iter()
.map(|section| match section {
TemplateSection::Literal(_) => CompiledSectionPlan::Literal,
TemplateSection::Template { ops, cache_key } => CompiledSectionPlan::Template {
exec: TemplateExecutionPlan {
kind: Self::compile_template_execution_kind(ops),
cache_policy: if repeated_keys.contains(cache_key) {
CachePolicy::PerCall
} else {
CachePolicy::Never
},
},
cache_key: *cache_key,
},
})
.collect()
}
fn compile_template_execution_kind(ops: &[StringOp]) -> TemplateExecutionKind {
if ops.is_empty() {
return TemplateExecutionKind::Passthrough;
}
if ops.len() == 1
&& let StringOp::Split {
sep,
range: RangeSpec::Index(idx),
} = &ops[0]
{
return TemplateExecutionKind::SplitIndex {
sep: sep.clone(),
idx: *idx,
};
}
if ops.len() == 2
&& let [
StringOp::Split {
sep: split_sep,
range,
},
StringOp::Join { sep: join_sep },
] = ops
&& Self::is_full_range(range)
{
return TemplateExecutionKind::SplitJoinRewrite {
split_sep: split_sep.clone(),
join_sep: join_sep.clone(),
};
}
TemplateExecutionKind::Generic
}
fn estimate_output_capacity(&self, input: &str) -> usize {
self.sections
.iter()
.map(|section| match section {
TemplateSection::Literal(text) => text.len(),
TemplateSection::Template { .. } => 0,
})
.sum::<usize>()
+ input.len()
}
fn literal_output_capacity(&self) -> usize {
self.sections
.iter()
.map(|section| match section {
TemplateSection::Literal(text) => text.len(),
TemplateSection::Template { .. } => 0,
})
.sum()
}
fn hash_input(input: &str) -> u64 {
let mut hasher = DefaultHasher::new();
input.hash(&mut hasher);
hasher.finish()
}
#[inline]
fn fast_split_join(&self, input: &str, split_sep: &str, join_sep: &str) -> String {
if split_sep.is_empty() || split_sep == join_sep {
return input.to_string();
}
if split_sep.len() == 1 {
let split_byte = split_sep.as_bytes()[0];
let estimated_len = if join_sep.len() == 1 {
input.len()
} else {
let replacements = memchr_iter(split_byte, input.as_bytes()).count();
input.len() + replacements.saturating_mul(join_sep.len().saturating_sub(1))
};
let mut result = String::with_capacity(estimated_len);
let mut start = 0usize;
for idx in memchr_iter(split_byte, input.as_bytes()) {
result.push_str(&input[start..idx]);
result.push_str(join_sep);
start = idx + 1;
}
result.push_str(&input[start..]);
result
} else {
input.replace(split_sep, join_sep)
}
}
#[inline]
fn fast_split_index(&self, input: &str, sep: &str, idx: isize) -> String {
if sep.is_empty() {
let parts = get_cached_split(input, sep);
return apply_range(&parts, &RangeSpec::Index(idx))
.into_iter()
.next()
.unwrap_or_default();
}
if sep.len() == 1 {
let sep_byte = sep.as_bytes()[0];
let parts_len = memchr_iter(sep_byte, input.as_bytes()).count() + 1;
let resolved = Self::resolve_split_index(idx, parts_len);
return Self::split_index_single_byte(input, sep_byte, resolved);
}
let parts_len = input.matches(sep).count() + 1;
let resolved = Self::resolve_split_index(idx, parts_len);
input
.split(sep)
.nth(resolved)
.unwrap_or_default()
.to_string()
}
#[inline]
fn split_index_single_byte(input: &str, sep_byte: u8, target_idx: usize) -> String {
let mut start = 0usize;
for (current_idx, idx) in memchr_iter(sep_byte, input.as_bytes()).enumerate() {
if current_idx == target_idx {
return input[start..idx].to_string();
}
start = idx + 1;
}
input[start..].to_string()
}
#[inline]
fn resolve_split_index(idx: isize, parts_len: usize) -> usize {
let parts_len_i = parts_len as isize;
let resolved = if idx < 0 { parts_len_i + idx } else { idx };
resolved.clamp(0, parts_len_i.saturating_sub(1)) as usize
}
#[inline]
fn is_full_range(range: &RangeSpec) -> bool {
matches!(range, RangeSpec::Range(None, None, false))
}
fn format_operations_summary(ops: &[StringOp]) -> String {
ops.iter()
.map(|op| match op {
StringOp::Split { sep, range } => format!(
"split('{sep}', {})",
match range {
RangeSpec::Index(i) => i.to_string(),
RangeSpec::Range(s, e, inc) => match (s, e) {
(None, None) => "..".into(),
(Some(s), None) => format!("{s}.."),
(None, Some(e)) => {
if *inc {
format!("..={e}")
} else {
format!("..{e}")
}
}
(Some(s), Some(e)) => {
let dots = if *inc { "..=" } else { ".." };
format!("{s}{dots}{e}")
}
},
}
),
StringOp::Upper => "upper".into(),
StringOp::Lower => "lower".into(),
StringOp::Append { suffix } => format!("append('{suffix}')"),
StringOp::Prepend { prefix } => format!("prepend('{prefix}')"),
StringOp::Replace {
pattern,
replacement,
..
} => format!("replace('{pattern}' → '{replacement}')"),
_ => format!("{op:?}").to_lowercase(),
})
.collect::<Vec<_>>()
.join(" | ")
}
fn make_template_section(ops: Vec<StringOp>) -> TemplateSection {
TemplateSection::from_ops(ops)
}
fn hash_ops(ops: &[StringOp]) -> u64 {
let mut hasher = DefaultHasher::new();
ops.hash(&mut hasher);
hasher.finish()
}
fn try_single_block(template: &str) -> Result<Option<Self>, String> {
if !(template.starts_with('{') && template.ends_with('}')) {
return Ok(None);
}
let mut depth = 0u32;
for ch in template[1..template.len() - 1].chars() {
match ch {
'{' => depth += 1,
'}' => {
if depth == 0 {
return Ok(None);
}
depth -= 1;
}
_ => {}
}
}
if depth != 0 {
return Ok(None);
}
let (ops, dbg_flag) = parser::parse_template(template)?;
let sections = vec![Self::make_template_section(ops)];
Ok(Some(Self::new(template.to_string(), sections, dbg_flag)))
}
}
impl Display for Template {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.raw)
}
}
#[deprecated(
since = "0.14.0",
note = "use `Template` instead; `MultiTemplate` will be removed in the next major release"
)]
pub type MultiTemplate = Template;