substitute 0.2.0

extremly simple string templating
Documentation
//! trivial string templates based on [the FORTH word of the same name][1].
//!
//! the primary advantage over `str::replace` is that it can perform multiple replacements while only allocating one new string.
//!
//! a template is a string that contains several *substitutions*.
//!
//! a substitution starts with '%', then has any number of of charachers in its name, then ends with '%'.
//! substitution names are case-sensitive.
//!
//! a Substituter maps a substitution name to its replacement
//!
//! the substitution `%%` always has the replacment `%`.  this is to allow escaping of literal percentage signs.
//!
//! has `no_std` support, and can support `str` and `[u8]` templates.
//!
//! [1]: <https://forth-standard.org/standard/string/SUBSTITUTE>

#![forbid(unsafe_code)]
#![deny(clippy::implicit_hasher)]
#![warn(clippy::pedantic)]
#![no_std]

#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "std")]
extern crate std;

#[cfg(feature = "alloc")]
use alloc::string::{String, ToString};
#[cfg(feature = "std")]
use std::collections::HashMap;
#[cfg(feature = "std")]
use core::borrow::Borrow;
#[cfg(feature = "std")]
use core::hash::Hash;
use core::iter::Iterator;
use core::fmt;
use core::convert::Infallible;
use core::ops::{Index, Range};

// TODO: nightly feature to support BorrowedCursor as Output

#[derive(Debug, Clone)]
struct ErrorContext{
	absolute_offset: usize,
	relative_offset: usize,
    #[cfg(feature = "alloc")]
	filename: Option<String>,
	/// 1-indexed line number within the template string
	lineno: u32,
	// TODO: "column" field that holds the offset within a given line
	/// length of the highlighted range
	length: usize,
    #[cfg(feature = "alloc")]
	nearby: String,
}

impl ErrorContext {
    #[cold]
	fn new(src: &[u8], offset: usize, mut length: usize) -> Self {
		let mut start = offset.saturating_sub(15);
		let isctrl = |c: &u8| c.is_ascii_control();
		if let Some(lidx) = src[start..offset].iter().rposition(isctrl) {
			start += lidx;
		}
		let mut end = (offset.saturating_add(length.clamp(15, 100))).min(src.len());
		if let Some(lidx) = src[offset..end].iter().position(isctrl) {
			end = offset + lidx;
		}
		let lineno = src[..offset].split(|&b| b == b'\n').count().try_into().unwrap_or(u32::MAX);
		length = length.min(end-offset);
		// TODO: strip whitespace, count line numbers.
		ErrorContext{
			absolute_offset: offset,
			relative_offset: offset-start,
            #[cfg(feature = "alloc")]
			nearby: String::from_utf8_lossy(&src[start..end]).to_string(),
            #[cfg(feature = "alloc")]
			filename: None,
			length,
			lineno,
		}
	}

    #[cfg(feature = "alloc")]
    pub fn filename(&self) -> Option<&str> {
        self.filename.as_ref().map(|x| x.as_str())
    }

    #[cfg(not(feature = "alloc"))]
    #[allow(clippy::unused_self)]
    pub fn filename(&self) -> Option<&str> {
        None
    }
}

impl fmt::Display for ErrorContext {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "at ")?;
		if let Some(name) = &self.filename() {
			write!(f, "{name}:")?;
		} else {
			write!(f, "line ")?;
		}

		write!(f, "{}", self.lineno)?;
		if f.alternate() {
			writeln!(f, " (byte offset {})", self.absolute_offset)?;
		} else {
			writeln!(f)?;
		}
        #[cfg(feature = "alloc")]
        {
            writeln!(f, "{}", self.nearby)?;
        }
		for _ in 0..self.relative_offset {
			write!(f, " ")?;
		}
		for _ in 0..self.length {
			write!(f, "^")?;
		}
		writeln!(f)?;
		Ok(())
    }
}

#[non_exhaustive]
#[derive(Debug, Clone, PartialEq)]
pub enum ErrorKind<E = Infallible> {
	/// the template string has an odd number of '%' bytes
	UnmatchedPercent,
	/// the template string contains a substitution name that is not known
	/// to the substituter.
	UnknownSubstitution,
    /// an error occurred writing to the output
    Output(E),
}

impl<E: fmt::Display + fmt::Debug> fmt::Display for ErrorKind<E> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "{}", match self {
			ErrorKind::UnmatchedPercent => "unmatched percent sign",
			ErrorKind::UnknownSubstitution => "unknown substitution name",
            ErrorKind::Output(err) => return write!(f, "error writing to output: {err:?}"),
		})
	}
}

/// an error that occured during substitution
#[derive(Debug, Clone)]
pub struct Error<E = Infallible> {
	kind: ErrorKind<E>,
	cx: ErrorContext,
}

/// format the error nicely across multiple lines.
///
/// use the `{:#}` (alternate) flag to also display the byte offset within the
/// template the error occurred at.
///
/// the exact format is semver exempt, and should not be parsed.
impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "error expanding template: {} ", self.kind)?;
		// use Display::fmt here instead of write! in order to preserve the
		// formatting flags.
		self.cx.fmt(f)
	}
}

impl Error {
	#[must_use = "pure function with no side effects"]
	pub fn kind(&self) -> &ErrorKind {
		&self.kind
	}

	/// attach filename and line number position info to an error message.
	///
	/// usually used with [`Result::map_err`].
	///
	/// ```rust
	/// use substitute::substitute;
	/// let r = substitute("%notfound%", &()).map_err(|e| e.with_pos(file!(), line!()));
	///
	/// assert_eq!(r.unwrap_err().to_string(), "\
	/// error expanding template: unknown substitution name at src/lib.rs:6
	/// %notfound%
	///  ^^^^^^^^
	/// ");
	/// ```
	#[must_use = "pure function with no side effects"]
    #[cfg(feature = "alloc")]
	pub fn with_pos(self, filename: &str, lineno: u32) -> Self {
		Self {
			cx: ErrorContext{
				filename: Some(filename.to_string()),
				// offset the line number, taking into account the fact it
				// is 1-indexed.
				lineno: self.cx.lineno - 1 + lineno,
				.. self.cx
			},
			.. self
		}
	}
}


/// the core substitution function.
///
/// for each `%replacement%`, it calls `sub.lookup_replacement`.
pub fn substitute_into<'a, T, O, S>(template: &T, sub: &'a S, out: &mut O) -> Result<(), Error<O::Error>> where
    T: AsRef<[u8]> + 'a + ?Sized,
    T: Index<Range<usize>, Output = T>,
    S: Substituter<'a, T> + ?Sized,
    O: Output<T>,
    str: AsRef<T>,
{
	// position after the last '%' (the start of the current chunk)
	let mut start_pos = 0;
	let mut in_name = false;
	// note that using .chars().enumerate() here would be incorrect due to mixing char and byte indexes.
	for (i, &b) in template.as_ref().iter().enumerate() {
        let err_cx = || ErrorContext::new(template.as_ref(), start_pos, i-start_pos);
        let out_err = |err| Error{
            kind: ErrorKind::Output(err),
            cx: err_cx(),
        };
		if b == b'%' {
            let slice = &template[start_pos..i];
			if in_name {
                if start_pos == i {
                    out.append_to_output("%".as_ref()).map_err(out_err)?;
				} else if let Some(rep) = sub.lookup_replacement(slice) {
                    out.append_to_output(rep).map_err(out_err)?;
                } else {
					return Err(Error{
						kind: ErrorKind::UnknownSubstitution,
						cx: err_cx(),
					});
				}
			} else {
				out.append_to_output(slice).map_err(out_err)?;
			}
			start_pos = i + 1;
			in_name = !in_name;
		}
	}
	if in_name {
		Err(Error{
			kind: ErrorKind::UnmatchedPercent,
			cx: ErrorContext::new(template.as_ref(), start_pos-1, 1),
		})
	} else {
        let len = template.as_ref().len();
		out.append_to_output(&template[start_pos..len]).map_err(|err| Error{
            kind: ErrorKind::Output(err),
            cx: ErrorContext::new(template.as_ref(), start_pos, len),
        })?;
		Ok(())
	}
}

/// given a template string, it replaces every substitution with
/// the replacement given by the substituter.
#[cfg(feature = "alloc")]
pub fn substitute_string<'a, S: Substituter<'a> + ?Sized>(template: &str, sub: &'a S) -> Result<String, Error> {
    let mut out = String::with_capacity(template.len());
	substitute_into(template, sub, &mut out)?;
    Ok(out)
}


#[cfg(feature = "alloc")]
pub use crate::substitute_string as substitute;

pub trait Substituter<'a, T: ?Sized = str> {
	/// map a substitution name to its replacement.
	///
	/// returns None when given an unknown substitution name.
	///
	/// note that replacements are not cached, if a lookup corrosponds
	/// to an expensive operation like i/o, it should be cached internally
	/// for optimal performance.
	fn lookup_replacement(&'a self, name: &T) -> Option<&'a T>;
}

pub trait Output<T: ?Sized = str> {
    type Error;

    fn append_to_output(&mut self, section: &T) -> Result<(), Self::Error>;
}

#[cfg(feature = "std")]
impl<'a, K, V, S> Substituter<'a> for HashMap<K, V, S> where
	K: Borrow<str> + Eq + Hash + 'a,
	V: AsRef<str> + 'a,
	S: std::hash::BuildHasher,
{
	fn lookup_replacement(&'a self, name: &str) -> Option<&'a str> {
		self.get(name).map(std::convert::AsRef::as_ref)
	}
}

/// a Substituter that interprets a slice of pairs as a key-value map.
///
/// this may be faster than using a `HashMap` when you have a small number of
/// substitutions, as it skips a heap allocation.
impl<'a, S: AsRef<str> + 'a> Substituter<'a> for [(S, S)] {
	fn lookup_replacement(&'a self, name: &str) -> Option<&'a str> {
		for (k, v) in self {
			if k.as_ref() == name {
				return Some(v.as_ref())
			}
		}
		None
	}
}

impl<'a, S: AsRef<str> + 'a, const N: usize> Substituter<'a> for [(S, S); N] {
	fn lookup_replacement(&'a self, name: &str) -> Option<&'a str> {
		self[..].lookup_replacement(name)
	}
}

/// empty substituter
impl<'a, T: ?Sized> Substituter<'a, T> for () {
	fn lookup_replacement(&'a self, _name: &T) -> Option<&'a T> {
		None
	}
}

#[cfg(feature = "alloc")]
impl Output<str> for String {
    type Error = Infallible;

    fn append_to_output(&mut self, section: &str) -> Result<(), Infallible> {
        *self += section;
        Ok(())
    }
}

#[cfg(feature = "alloc")]
impl Output<[u8]> for alloc::vec::Vec<u8> {
    type Error = Infallible;

    fn append_to_output(&mut self, section: &[u8]) -> Result<(), Infallible> {
        self.extend_from_slice(section);
        Ok(())
    }
}


// TODO: benchmark against a full templating library
// TODO: substituter that caches the results of another substituter.
// TODO: substituter that wraps around a Serialize struct? or use own derive macro?
// TODO: Output that wraps io::Write

#[cfg(all(test, feature = "alloc"))]
mod tests {
    use super::*;
    use alloc::format;

    #[test]
    #[cfg(feature = "std")]
    fn it_works() {
        let templ1 = "Greetings, %name%, it is %weekday%, and I'm feeling 100%%";
		let sub1 = HashMap::from([("name".to_string(), "Alice"), ("weekday".to_string(), "Monday")]);
        assert_eq!(substitute(templ1, &sub1).unwrap(), "Greetings, Alice, it is Monday, and I'm feeling 100%");
    }

	#[test]
	fn errors() {
		let bad_templ1 = "Hi, %name";
		let templ1 = "Hi, %name%";
		let err1 = substitute(bad_templ1, &[("name", "Bob")][..]).unwrap_err();
		assert_eq!(err1.kind(), &ErrorKind::UnmatchedPercent);
		assert_eq!(err1.to_string(),
				   r"error expanding template: unmatched percent sign at line 1
Hi, %name
    ^
");
		let err2 = substitute(templ1, &[("lastname", "Smith")]).unwrap_err();
		assert_eq!(err2.kind(), &ErrorKind::UnknownSubstitution);
		assert_eq!(format!("{err2:#}"),
				   r"error expanding template: unknown substitution name at line 1 (byte offset 5)
Hi, %name%
     ^^^^
");
	}

	// TODO: property based testing. (eg. if there are an even number of '%' signs, there should never be a an UnmatchedPercent error).
}