use std::collections::{HashMap, hash_map::DefaultHasher};
use std::fmt::Display;
use std::hash::{Hash, Hasher};
use crate::pipeline::get_cached_split;
use crate::pipeline::{DebugTracer, RangeSpec, StringOp, apply_ops_internal, apply_range, parser}; use memchr::memchr_iter;
#[derive(Debug, Clone)]
pub struct MultiTemplate {
raw: String,
sections: Vec<TemplateSection>,
debug: bool,
}
#[derive(Debug, Clone)]
pub enum TemplateSection {
Literal(String),
Template { ops: Vec<StringOp>, cache_key: u64 },
}
impl TemplateSection {
pub(crate) fn from_ops(ops: Vec<StringOp>) -> Self {
let cache_key = MultiTemplate::hash_ops(&ops);
Self::Template { ops, cache_key }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionType {
Literal,
Template,
}
#[derive(Debug, Clone)]
pub struct SectionInfo {
pub section_type: SectionType,
pub overall_position: usize,
pub template_position: Option<usize>,
pub content: Option<String>,
pub operations: Option<Vec<StringOp>>,
}
struct TemplateCache {
operations: HashMap<CacheKey, String>,
}
impl TemplateCache {
fn new() -> Self {
Self {
operations: HashMap::new(),
}
}
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
struct CacheKey {
input_hash: u64,
section_key: u64,
}
impl MultiTemplate {
fn new(raw: String, sections: Vec<TemplateSection>, debug: bool) -> Self {
Self {
raw,
sections,
debug,
}
}
pub fn parse(template: &str) -> Result<Self, String> {
if let Some(single) = Self::try_single_block(template)? {
return Ok(single);
}
let (sections, _) = parser::parse_multi_template(template)?;
Ok(Self::new(template.to_string(), sections, false))
}
pub fn parse_with_debug(template: &str, debug: Option<bool>) -> Result<Self, String> {
if let Some(mut single) = Self::try_single_block(template)? {
if let Some(dbg_override) = debug {
single.debug = dbg_override;
}
return Ok(single);
}
let (sections, inner_dbg) = parser::parse_multi_template(template)?;
Ok(Self::new(
template.to_string(),
sections,
debug.unwrap_or(inner_dbg),
))
}
pub fn format(&self, input: &str) -> Result<String, String> {
use std::time::Instant;
let mut cache = TemplateCache::new();
let mut result = String::new();
let mut hasher = DefaultHasher::new();
input.hash(&mut hasher);
let input_hash = hasher.finish();
let start_time = if self.debug {
Some(Instant::now())
} else {
None
};
if self.debug {
let tracer = DebugTracer::new(true);
let info = format!(
"{} sections (literal: {}, template: {})",
self.sections.len(),
self.sections.len() - self.template_section_count(),
self.template_section_count()
);
tracer.session_start("MULTI-TEMPLATE", &self.raw, input, Some(&info));
for (idx, section) in self.sections.iter().enumerate() {
match section {
TemplateSection::Literal(text) => {
let preview = if text.trim().is_empty() && text.len() <= 2 {
"whitespace".to_string()
} else if text.len() <= 20 {
format!("'{text}'")
} else {
format!("'{}...' ({} chars)", &text[..15], text.len())
};
tracer.section(idx + 1, self.sections.len(), "literal", &preview);
result.push_str(text);
if idx + 1 < self.sections.len() {
tracer.separator();
}
}
TemplateSection::Template { ops, cache_key } => {
let summary = Self::format_operations_summary(ops);
tracer.section(idx + 1, self.sections.len(), "template", &summary);
let out = self.apply_template_section(
input,
ops,
*cache_key,
input_hash,
&mut cache,
&Some(&tracer),
)?;
result.push_str(&out);
}
}
}
tracer.session_end("MULTI-TEMPLATE", &result, start_time.unwrap().elapsed());
} else {
for section in &self.sections {
match section {
TemplateSection::Literal(text) => result.push_str(text),
TemplateSection::Template { ops, cache_key } => {
let out = self.apply_template_section(
input, ops, *cache_key, input_hash, &mut cache, &None,
)?;
result.push_str(&out);
}
}
}
}
Ok(result)
}
pub fn template_string(&self) -> &str {
&self.raw
}
pub fn section_count(&self) -> usize {
self.sections.len()
}
pub fn template_section_count(&self) -> usize {
self.sections
.iter()
.filter(|s| matches!(s, TemplateSection::Template { .. }))
.count()
}
pub fn is_debug(&self) -> bool {
self.debug
}
pub fn with_debug(mut self, debug: bool) -> Self {
self.debug = debug;
self
}
pub fn set_debug(&mut self, debug: bool) {
self.debug = debug;
}
pub fn format_with_inputs(
&self,
inputs: &[&[&str]],
separators: &[&str],
) -> Result<String, String> {
let template_sections_count = self.template_section_count();
let adjusted_inputs: Vec<&[&str]> = (0..template_sections_count)
.map(|i| {
if i < inputs.len() {
inputs[i] } else {
&[] as &[&str] }
})
.collect();
let adjusted_separators: Vec<&str> = (0..template_sections_count)
.map(|i| {
if i < separators.len() {
separators[i] } else {
" " }
})
.collect();
let inputs = &adjusted_inputs;
let separators = &adjusted_separators;
let mut result = String::new();
let mut template_index = 0;
let mut cache = TemplateCache::new();
for section in &self.sections {
match section {
TemplateSection::Literal(text) => {
result.push_str(text);
}
TemplateSection::Template { ops, cache_key } => {
if template_index >= inputs.len() {
return Err("Internal error: template index out of bounds".to_string());
}
let section_inputs = inputs[template_index];
let separator = separators[template_index];
let output = match section_inputs.len() {
0 => String::new(),
1 => {
let mut input_hasher = std::collections::hash_map::DefaultHasher::new();
std::hash::Hash::hash(§ion_inputs[0], &mut input_hasher);
let input_hash = input_hasher.finish();
self.apply_template_section(
section_inputs[0],
ops,
*cache_key,
input_hash,
&mut cache,
&None, )?
}
_ => {
let mut results = Vec::new();
for input in section_inputs {
let mut input_hasher =
std::collections::hash_map::DefaultHasher::new();
std::hash::Hash::hash(&input, &mut input_hasher);
let input_hash = input_hasher.finish();
let result = self.apply_template_section(
input, ops, *cache_key, input_hash, &mut cache,
&None, )?;
results.push(result);
}
results.join(separator)
}
};
result.push_str(&output);
template_index += 1;
}
}
}
Ok(result)
}
pub fn get_template_sections(&self) -> Vec<(usize, &Vec<StringOp>)> {
let mut result = Vec::new();
let mut template_index = 0;
for section in &self.sections {
if let TemplateSection::Template { ops, .. } = section {
result.push((template_index, ops));
template_index += 1;
}
}
result
}
pub fn get_section_info(&self) -> Vec<SectionInfo> {
let mut result = Vec::new();
let mut template_position = 0;
for (overall_position, section) in self.sections.iter().enumerate() {
match section {
TemplateSection::Literal(text) => {
result.push(SectionInfo {
section_type: SectionType::Literal,
overall_position,
template_position: None,
content: Some(text.clone()),
operations: None,
});
}
TemplateSection::Template { ops, .. } => {
result.push(SectionInfo {
section_type: SectionType::Template,
overall_position,
template_position: Some(template_position),
content: None,
operations: Some(ops.clone()),
});
template_position += 1;
}
}
}
result
}
fn apply_template_section(
&self,
input: &str,
ops: &[StringOp],
section_key: u64,
input_hash: u64,
cache: &mut TemplateCache,
dbg: &Option<&DebugTracer>,
) -> Result<String, String> {
if ops.len() == 1
&& let StringOp::Split { sep, range } = &ops[0]
{
if let Some(t) = dbg {
t.cache_operation("FAST SPLIT", &format!("by '{sep}'"));
}
return Ok(self.fast_single_split(input, sep, range));
}
if ops.len() == 2
&& let [
StringOp::Split {
sep: split_sep,
range,
},
StringOp::Join { sep: join_sep },
] = ops
&& Self::is_full_range(range)
{
if let Some(t) = dbg {
t.cache_operation("FAST SPLIT+JOIN", "direct separator rewrite");
}
return Ok(self.fast_split_join(input, split_sep, join_sep));
}
let key = CacheKey {
input_hash,
section_key,
};
if let Some(cached) = cache.operations.get(&key) {
if let Some(t) = dbg {
t.cache_operation("CACHE HIT", "re-using formatted section");
}
return Ok(cached.clone());
}
if let Some(t) = dbg {
t.cache_operation("CACHE MISS", "computing section");
}
let nested_dbg = if self.debug {
Some(DebugTracer::new(true))
} else {
None
};
let out = apply_ops_internal(input, ops, self.debug, nested_dbg)?;
cache.operations.insert(key, out.clone());
Ok(out)
}
#[inline]
fn fast_single_split(&self, input: &str, sep: &str, range: &RangeSpec) -> String {
let parts = get_cached_split(input, sep);
let selected = apply_range(&parts, range);
match selected.len() {
0 => String::new(),
1 => selected[0].clone(),
_ => selected.join(sep),
}
}
#[inline]
fn fast_split_join(&self, input: &str, split_sep: &str, join_sep: &str) -> String {
if split_sep.is_empty() || split_sep == join_sep {
return input.to_string();
}
if split_sep.len() == 1 {
let split_byte = split_sep.as_bytes()[0];
let estimated_len = if join_sep.len() == 1 {
input.len()
} else {
let replacements = memchr_iter(split_byte, input.as_bytes()).count();
input.len() + replacements.saturating_mul(join_sep.len().saturating_sub(1))
};
let mut result = String::with_capacity(estimated_len);
let mut start = 0usize;
for idx in memchr_iter(split_byte, input.as_bytes()) {
result.push_str(&input[start..idx]);
result.push_str(join_sep);
start = idx + 1;
}
result.push_str(&input[start..]);
result
} else {
input.replace(split_sep, join_sep)
}
}
#[inline]
fn is_full_range(range: &RangeSpec) -> bool {
matches!(range, RangeSpec::Range(None, None, false))
}
fn format_operations_summary(ops: &[StringOp]) -> String {
ops.iter()
.map(|op| match op {
StringOp::Split { sep, range } => format!(
"split('{sep}', {})",
match range {
RangeSpec::Index(i) => i.to_string(),
RangeSpec::Range(s, e, inc) => match (s, e) {
(None, None) => "..".into(),
(Some(s), None) => format!("{s}.."),
(None, Some(e)) => {
if *inc {
format!("..={e}")
} else {
format!("..{e}")
}
}
(Some(s), Some(e)) => {
let dots = if *inc { "..=" } else { ".." };
format!("{s}{dots}{e}")
}
},
}
),
StringOp::Upper => "upper".into(),
StringOp::Lower => "lower".into(),
StringOp::Append { suffix } => format!("append('{suffix}')"),
StringOp::Prepend { prefix } => format!("prepend('{prefix}')"),
StringOp::Replace {
pattern,
replacement,
..
} => format!("replace('{pattern}' → '{replacement}')"),
_ => format!("{op:?}").to_lowercase(),
})
.collect::<Vec<_>>()
.join(" | ")
}
fn make_template_section(ops: Vec<StringOp>) -> TemplateSection {
TemplateSection::from_ops(ops)
}
fn hash_ops(ops: &[StringOp]) -> u64 {
let mut hasher = DefaultHasher::new();
ops.hash(&mut hasher);
hasher.finish()
}
fn try_single_block(template: &str) -> Result<Option<Self>, String> {
if !(template.starts_with('{') && template.ends_with('}')) {
return Ok(None);
}
let mut depth = 0u32;
for ch in template[1..template.len() - 1].chars() {
match ch {
'{' => depth += 1,
'}' => {
if depth == 0 {
return Ok(None);
}
depth -= 1;
}
_ => {}
}
}
if depth != 0 {
return Ok(None);
}
let (ops, dbg_flag) = parser::parse_template(template)?;
let sections = vec![Self::make_template_section(ops)];
Ok(Some(Self::new(template.to_string(), sections, dbg_flag)))
}
}
impl Display for MultiTemplate {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.raw)
}
}
pub type Template = MultiTemplate;