use std::borrow::{Borrow, Cow};
use std::fmt::{self, Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Add, AddAssign, Deref, Range};
use comemo::Tracked;
use ecow::EcoString;
use serde::{Deserialize, Serialize};
use unicode_segmentation::UnicodeSegmentation;
use crate::diag::{bail, At, SourceResult, StrResult};
use crate::engine::Engine;
use crate::foundations::{
cast, dict, func, repr, scope, ty, Array, Bytes, Context, Decimal, Dict, Func,
IntoValue, Label, Repr, Type, Value, Version,
};
use crate::layout::Alignment;
use crate::syntax::{Span, Spanned};
use crate::utils::PicoStr;
#[macro_export]
#[doc(hidden)]
macro_rules! __format_str {
($($tts:tt)*) => {{
$crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
}};
}
#[doc(inline)]
pub use crate::__format_str as format_str;
#[doc(hidden)]
pub use ecow::eco_format;
#[ty(scope, cast, title = "String")]
#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[derive(Serialize, Deserialize)]
#[serde(transparent)]
pub struct Str(EcoString);
impl Str {
pub fn new() -> Self {
Self(EcoString::new())
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn repeat(&self, n: usize) -> StrResult<Self> {
if self.0.len().checked_mul(n).is_none() {
return Err(eco_format!("cannot repeat this string {n} times"));
}
Ok(Self(self.0.repeat(n)))
}
pub fn as_str(&self) -> &str {
self
}
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index)?
.ok_or_else(|| out_of_bounds(index, self.len()))
}
fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
let resolved = wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v <= self.0.len());
if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
return Err(not_a_char_boundary(index));
}
Ok(resolved)
}
}
#[scope]
impl Str {
#[func(constructor)]
pub fn construct(
value: ToStr,
#[named]
#[default(Spanned::new(10, Span::detached()))]
base: Spanned<i64>,
) -> SourceResult<Str> {
Ok(match value {
ToStr::Str(s) => {
if base.v != 10 {
bail!(base.span, "base is only supported for integers");
}
s
}
ToStr::Int(n) => {
if base.v < 2 || base.v > 36 {
bail!(base.span, "base must be between 2 and 36");
}
repr::format_int_with_base(n, base.v).into()
}
})
}
#[func(title = "Length")]
pub fn len(&self) -> usize {
self.0.len()
}
#[func]
pub fn first(&self) -> StrResult<Str> {
self.0
.graphemes(true)
.next()
.map(Into::into)
.ok_or_else(string_is_empty)
}
#[func]
pub fn last(&self) -> StrResult<Str> {
self.0
.graphemes(true)
.next_back()
.map(Into::into)
.ok_or_else(string_is_empty)
}
#[func]
pub fn at(
&self,
index: i64,
#[named]
default: Option<Value>,
) -> StrResult<Value> {
let len = self.len();
self.locate_opt(index)?
.and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
.or(default)
.ok_or_else(|| no_default_and_out_of_bounds(index, len))
}
#[func]
pub fn slice(
&self,
start: i64,
#[default]
end: Option<i64>,
#[named]
count: Option<i64>,
) -> StrResult<Str> {
let end = end.or(count.map(|c| start + c)).unwrap_or(self.len() as i64);
let start = self.locate(start)?;
let end = self.locate(end)?.max(start);
Ok(self.0[start..end].into())
}
#[func]
pub fn clusters(&self) -> Array {
self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
}
#[func]
pub fn codepoints(&self) -> Array {
self.chars().map(|c| Value::Str(c.into())).collect()
}
#[func]
pub fn to_unicode(
character: char,
) -> u32 {
character as u32
}
#[func]
pub fn from_unicode(
value: u32,
) -> StrResult<Str> {
let c: char = value
.try_into()
.map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
Ok(c.into())
}
#[func]
pub fn contains(
&self,
pattern: StrPattern,
) -> bool {
match pattern {
StrPattern::Str(pat) => self.0.contains(pat.as_str()),
StrPattern::Regex(re) => re.is_match(self),
}
}
#[func]
pub fn starts_with(
&self,
pattern: StrPattern,
) -> bool {
match pattern {
StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
}
}
#[func]
pub fn ends_with(
&self,
pattern: StrPattern,
) -> bool {
match pattern {
StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
StrPattern::Regex(re) => {
let mut start_byte = 0;
while let Some(mat) = re.find_at(self, start_byte) {
if mat.end() == self.0.len() {
return true;
}
let Some(c) = self[mat.start()..].chars().next() else { break };
start_byte = mat.start() + c.len_utf8();
}
false
}
}
}
#[func]
pub fn find(
&self,
pattern: StrPattern,
) -> Option<Str> {
match pattern {
StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
}
}
#[func]
pub fn position(
&self,
pattern: StrPattern,
) -> Option<usize> {
match pattern {
StrPattern::Str(pat) => self.0.find(pat.as_str()),
StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
}
}
#[func]
pub fn match_(
&self,
pattern: StrPattern,
) -> Option<Dict> {
match pattern {
StrPattern::Str(pat) => {
self.0.match_indices(pat.as_str()).next().map(match_to_dict)
}
StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
}
}
#[func]
pub fn matches(
&self,
pattern: StrPattern,
) -> Array {
match pattern {
StrPattern::Str(pat) => self
.0
.match_indices(pat.as_str())
.map(match_to_dict)
.map(Value::Dict)
.collect(),
StrPattern::Regex(re) => re
.captures_iter(self)
.map(captures_to_dict)
.map(Value::Dict)
.collect(),
}
}
#[func]
pub fn replace(
&self,
engine: &mut Engine,
context: Tracked<Context>,
pattern: StrPattern,
replacement: Replacement,
#[named]
count: Option<usize>,
) -> SourceResult<Str> {
let mut output = EcoString::with_capacity(self.as_str().len());
let mut last_match = 0;
let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
output.push_str(&self[last_match..range.start]);
last_match = range.end;
match &replacement {
Replacement::Str(s) => output.push_str(s),
Replacement::Func(func) => {
let piece = func
.call(engine, context, [dict])?
.cast::<Str>()
.at(func.span())?;
output.push_str(&piece);
}
}
Ok(())
};
let count = count.unwrap_or(usize::MAX);
match &pattern {
StrPattern::Str(pat) => {
for m in self.match_indices(pat.as_str()).take(count) {
let (start, text) = m;
handle_match(start..start + text.len(), match_to_dict(m))?;
}
}
StrPattern::Regex(re) => {
for caps in re.captures_iter(self).take(count) {
let m = caps.get(0).unwrap();
handle_match(m.start()..m.end(), captures_to_dict(caps))?;
}
}
}
output.push_str(&self[last_match..]);
Ok(output.into())
}
#[func]
pub fn trim(
&self,
#[default]
pattern: Option<StrPattern>,
#[named]
at: Option<StrSide>,
#[named]
#[default(true)]
repeat: bool,
) -> Str {
let mut start = matches!(at, Some(StrSide::Start) | None);
let end = matches!(at, Some(StrSide::End) | None);
let trimmed = match pattern {
None => match at {
None => self.0.trim(),
Some(StrSide::Start) => self.0.trim_start(),
Some(StrSide::End) => self.0.trim_end(),
},
Some(StrPattern::Str(pat)) => {
let pat = pat.as_str();
let mut s = self.as_str();
if repeat {
if start {
s = s.trim_start_matches(pat);
}
if end {
s = s.trim_end_matches(pat);
}
} else {
if start {
s = s.strip_prefix(pat).unwrap_or(s);
}
if end {
s = s.strip_suffix(pat).unwrap_or(s);
}
}
s
}
Some(StrPattern::Regex(re)) => {
let s = self.as_str();
let mut last = None;
let mut range = 0..s.len();
for m in re.find_iter(s) {
let consecutive = last == Some(m.start());
start &= m.start() == 0 || consecutive;
if start {
range.start = m.end();
start &= repeat;
}
if end && (!consecutive || !repeat) {
range.end = m.start();
}
last = Some(m.end());
}
if last.is_some_and(|last| last < s.len()) {
range.end = s.len();
}
&s[range.start..range.start.max(range.end)]
}
};
trimmed.into()
}
#[func]
pub fn split(
&self,
#[default]
pattern: Option<StrPattern>,
) -> Array {
let s = self.as_str();
match pattern {
None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
Some(StrPattern::Str(pat)) => {
s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
}
Some(StrPattern::Regex(re)) => {
re.split(s).map(|v| Value::Str(v.into())).collect()
}
}
}
#[func(title = "Reverse")]
pub fn rev(&self) -> Str {
let mut s = EcoString::with_capacity(self.0.len());
for grapheme in self.as_str().graphemes(true).rev() {
s.push_str(grapheme);
}
s.into()
}
}
impl Deref for Str {
type Target = str;
fn deref(&self) -> &str {
&self.0
}
}
impl Debug for Str {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Debug::fmt(self.as_str(), f)
}
}
impl Display for Str {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl Repr for Str {
fn repr(&self) -> EcoString {
self.as_ref().repr()
}
}
impl Repr for EcoString {
fn repr(&self) -> EcoString {
self.as_ref().repr()
}
}
impl Repr for str {
fn repr(&self) -> EcoString {
let mut r = EcoString::with_capacity(self.len() + 2);
r.push('"');
for c in self.chars() {
match c {
'\0' => r.push_str(r"\u{0}"),
'\'' => r.push('\''),
'"' => r.push_str(r#"\""#),
_ => c.escape_debug().for_each(|c| r.push(c)),
}
}
r.push('"');
r
}
}
impl Add for Str {
type Output = Self;
fn add(mut self, rhs: Self) -> Self::Output {
self += rhs;
self
}
}
impl AddAssign for Str {
fn add_assign(&mut self, rhs: Self) {
self.0.push_str(rhs.as_str());
}
}
impl AsRef<str> for Str {
fn as_ref(&self) -> &str {
self
}
}
impl Borrow<str> for Str {
fn borrow(&self) -> &str {
self
}
}
impl From<char> for Str {
fn from(c: char) -> Self {
Self(c.into())
}
}
impl From<&str> for Str {
fn from(s: &str) -> Self {
Self(s.into())
}
}
impl From<EcoString> for Str {
fn from(s: EcoString) -> Self {
Self(s)
}
}
impl From<String> for Str {
fn from(s: String) -> Self {
Self(s.into())
}
}
impl From<Cow<'_, str>> for Str {
fn from(s: Cow<str>) -> Self {
Self(s.into())
}
}
impl FromIterator<char> for Str {
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
impl From<Str> for EcoString {
fn from(str: Str) -> Self {
str.0
}
}
impl From<Str> for String {
fn from(s: Str) -> Self {
s.0.into()
}
}
cast! {
char,
self => Value::Str(self.into()),
string: Str => {
let mut chars = string.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => c,
_ => bail!("expected exactly one character"),
}
},
}
cast! {
&str,
self => Value::Str(self.into()),
}
cast! {
EcoString,
self => Value::Str(self.into()),
v: Str => v.into(),
}
cast! {
PicoStr,
self => Value::Str(self.resolve().into()),
v: Str => v.as_str().into(),
}
cast! {
String,
self => Value::Str(self.into()),
v: Str => v.into(),
}
pub enum ToStr {
Str(Str),
Int(i64),
}
cast! {
ToStr,
v: i64 => Self::Int(v),
v: f64 => Self::Str(repr::display_float(v).into()),
v: Decimal => Self::Str(format_str!("{}", v)),
v: Version => Self::Str(format_str!("{}", v)),
v: Bytes => Self::Str(
std::str::from_utf8(&v)
.map_err(|_| "bytes are not valid utf-8")?
.into()
),
v: Label => Self::Str(v.as_str().into()),
v: Type => Self::Str(v.long_name().into()),
v: Str => Self::Str(v),
}
fn match_to_dict((start, text): (usize, &str)) -> Dict {
dict! {
"start" => start,
"end" => start + text.len(),
"text" => text,
"captures" => Array::new(),
}
}
fn captures_to_dict(cap: regex::Captures) -> Dict {
let m = cap.get(0).expect("missing first match");
dict! {
"start" => m.start(),
"end" => m.end(),
"text" => m.as_str(),
"captures" => cap.iter()
.skip(1)
.map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
.collect::<Array>(),
}
}
#[cold]
fn out_of_bounds(index: i64, len: usize) -> EcoString {
eco_format!("string index out of bounds (index: {}, len: {})", index, len)
}
#[cold]
fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
eco_format!("no default value was specified and string index out of bounds (index: {}, len: {})", index, len)
}
#[cold]
fn not_a_char_boundary(index: i64) -> EcoString {
eco_format!("string index {} is not a character boundary", index)
}
#[cold]
fn string_is_empty() -> EcoString {
"string is empty".into()
}
#[ty(scope)]
#[derive(Debug, Clone)]
pub struct Regex(regex::Regex);
impl Regex {
pub fn new(re: &str) -> StrResult<Self> {
regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
}
}
#[scope]
impl Regex {
#[func(constructor)]
pub fn construct(
regex: Spanned<Str>,
) -> SourceResult<Regex> {
Self::new(®ex.v).at(regex.span)
}
}
impl Deref for Regex {
type Target = regex::Regex;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Repr for Regex {
fn repr(&self) -> EcoString {
eco_format!("regex({})", self.0.as_str().repr())
}
}
impl PartialEq for Regex {
fn eq(&self, other: &Self) -> bool {
self.0.as_str() == other.0.as_str()
}
}
impl Hash for Regex {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
#[derive(Debug, Clone)]
pub enum StrPattern {
Str(Str),
Regex(Regex),
}
cast! {
StrPattern,
self => match self {
Self::Str(v) => v.into_value(),
Self::Regex(v) => v.into_value(),
},
v: Str => Self::Str(v),
v: Regex => Self::Regex(v),
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub enum StrSide {
Start,
End,
}
cast! {
StrSide,
v: Alignment => match v {
Alignment::START => Self::Start,
Alignment::END => Self::End,
_ => bail!("expected either `start` or `end`"),
},
}
pub enum Replacement {
Str(Str),
Func(Func),
}
cast! {
Replacement,
self => match self {
Self::Str(v) => v.into_value(),
Self::Func(v) => v.into_value(),
},
v: Str => Self::Str(v),
v: Func => Self::Func(v)
}