1use std::borrow::{Borrow, Cow};
2use std::fmt::{self, Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::{Add, AddAssign, Deref, Range};
5
6use comemo::Tracked;
7use ecow::EcoString;
8use serde::{Deserialize, Serialize};
9use typst_syntax::{Span, Spanned};
10use unicode_segmentation::UnicodeSegmentation;
11
12use crate::diag::{bail, At, SourceResult, StrResult};
13use crate::engine::Engine;
14use crate::foundations::{
15 cast, dict, func, repr, scope, ty, Array, Bytes, Context, Decimal, Dict, Func,
16 IntoValue, Label, Repr, Type, Value, Version,
17};
18use crate::layout::Alignment;
19
20#[macro_export]
22#[doc(hidden)]
23macro_rules! __format_str {
24 ($($tts:tt)*) => {{
25 $crate::foundations::Str::from($crate::foundations::eco_format!($($tts)*))
26 }};
27}
28
29#[doc(hidden)]
30pub use ecow::eco_format;
31
32#[doc(inline)]
33pub use crate::__format_str as format_str;
34
35#[ty(scope, cast, title = "String")]
72#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
73#[derive(Serialize, Deserialize)]
74#[serde(transparent)]
75pub struct Str(EcoString);
76
77impl Str {
78 pub fn new() -> Self {
80 Self(EcoString::new())
81 }
82
83 pub fn is_empty(&self) -> bool {
85 self.0.is_empty()
86 }
87
88 pub fn repeat(&self, n: usize) -> StrResult<Self> {
90 if self.0.len().checked_mul(n).is_none() {
91 return Err(eco_format!("cannot repeat this string {n} times"));
92 }
93 Ok(Self(self.0.repeat(n)))
94 }
95
96 pub fn as_str(&self) -> &str {
98 self
99 }
100
101 fn locate(&self, index: i64) -> StrResult<usize> {
103 self.locate_opt(index)?
104 .ok_or_else(|| out_of_bounds(index, self.len()))
105 }
106
107 fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
111 let wrapped =
112 if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
113
114 let resolved = wrapped
115 .and_then(|v| usize::try_from(v).ok())
116 .filter(|&v| v <= self.0.len());
117
118 if resolved.is_some_and(|i| !self.0.is_char_boundary(i)) {
119 return Err(not_a_char_boundary(index));
120 }
121
122 Ok(resolved)
123 }
124}
125
126#[scope]
127impl Str {
128 #[func(constructor)]
150 pub fn construct(
151 value: ToStr,
153 #[named]
155 #[default(Spanned::new(10, Span::detached()))]
156 base: Spanned<i64>,
157 ) -> SourceResult<Str> {
158 Ok(match value {
159 ToStr::Str(s) => {
160 if base.v != 10 {
161 bail!(base.span, "base is only supported for integers");
162 }
163 s
164 }
165 ToStr::Int(n) => {
166 if base.v < 2 || base.v > 36 {
167 bail!(base.span, "base must be between 2 and 36");
168 }
169 repr::format_int_with_base(n, base.v).into()
170 }
171 })
172 }
173
174 #[func(title = "Length")]
176 pub fn len(&self) -> usize {
177 self.0.len()
178 }
179
180 #[func]
183 pub fn first(&self) -> StrResult<Str> {
184 self.0
185 .graphemes(true)
186 .next()
187 .map(Into::into)
188 .ok_or_else(string_is_empty)
189 }
190
191 #[func]
194 pub fn last(&self) -> StrResult<Str> {
195 self.0
196 .graphemes(true)
197 .next_back()
198 .map(Into::into)
199 .ok_or_else(string_is_empty)
200 }
201
202 #[func]
206 pub fn at(
207 &self,
208 index: i64,
210 #[named]
212 default: Option<Value>,
213 ) -> StrResult<Value> {
214 let len = self.len();
215 self.locate_opt(index)?
216 .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
217 .or(default)
218 .ok_or_else(|| no_default_and_out_of_bounds(index, len))
219 }
220
221 #[func]
224 pub fn slice(
225 &self,
226 start: i64,
229 #[default]
233 end: Option<i64>,
234 #[named]
237 count: Option<i64>,
238 ) -> StrResult<Str> {
239 let end = end.or(count.map(|c| start + c)).unwrap_or(self.len() as i64);
240 let start = self.locate(start)?;
241 let end = self.locate(end)?.max(start);
242 Ok(self.0[start..end].into())
243 }
244
245 #[func]
247 pub fn clusters(&self) -> Array {
248 self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
249 }
250
251 #[func]
253 pub fn codepoints(&self) -> Array {
254 self.chars().map(|c| Value::Str(c.into())).collect()
255 }
256
257 #[func]
266 pub fn to_unicode(
267 character: char,
269 ) -> u32 {
270 character as u32
271 }
272
273 #[func]
279 pub fn from_unicode(
280 value: u32,
282 ) -> StrResult<Str> {
283 let c: char = value
284 .try_into()
285 .map_err(|_| eco_format!("{value:#x} is not a valid codepoint"))?;
286 Ok(c.into())
287 }
288
289 #[func]
294 pub fn contains(
295 &self,
296 pattern: StrPattern,
298 ) -> bool {
299 match pattern {
300 StrPattern::Str(pat) => self.0.contains(pat.as_str()),
301 StrPattern::Regex(re) => re.is_match(self),
302 }
303 }
304
305 #[func]
307 pub fn starts_with(
308 &self,
309 pattern: StrPattern,
311 ) -> bool {
312 match pattern {
313 StrPattern::Str(pat) => self.0.starts_with(pat.as_str()),
314 StrPattern::Regex(re) => re.find(self).is_some_and(|m| m.start() == 0),
315 }
316 }
317
318 #[func]
320 pub fn ends_with(
321 &self,
322 pattern: StrPattern,
324 ) -> bool {
325 match pattern {
326 StrPattern::Str(pat) => self.0.ends_with(pat.as_str()),
327 StrPattern::Regex(re) => {
328 let mut start_byte = 0;
329 while let Some(mat) = re.find_at(self, start_byte) {
330 if mat.end() == self.0.len() {
331 return true;
332 }
333
334 let Some(c) = self[mat.start()..].chars().next() else { break };
337 start_byte = mat.start() + c.len_utf8();
338 }
339 false
340 }
341 }
342 }
343
344 #[func]
347 pub fn find(
348 &self,
349 pattern: StrPattern,
351 ) -> Option<Str> {
352 match pattern {
353 StrPattern::Str(pat) => self.0.contains(pat.as_str()).then_some(pat),
354 StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()),
355 }
356 }
357
358 #[func]
361 pub fn position(
362 &self,
363 pattern: StrPattern,
365 ) -> Option<usize> {
366 match pattern {
367 StrPattern::Str(pat) => self.0.find(pat.as_str()),
368 StrPattern::Regex(re) => re.find(self).map(|m| m.start()),
369 }
370 }
371
372 #[func]
385 pub fn match_(
386 &self,
387 pattern: StrPattern,
389 ) -> Option<Dict> {
390 match pattern {
391 StrPattern::Str(pat) => {
392 self.0.match_indices(pat.as_str()).next().map(match_to_dict)
393 }
394 StrPattern::Regex(re) => re.captures(self).map(captures_to_dict),
395 }
396 }
397
398 #[func]
402 pub fn matches(
403 &self,
404 pattern: StrPattern,
406 ) -> Array {
407 match pattern {
408 StrPattern::Str(pat) => self
409 .0
410 .match_indices(pat.as_str())
411 .map(match_to_dict)
412 .map(Value::Dict)
413 .collect(),
414 StrPattern::Regex(re) => re
415 .captures_iter(self)
416 .map(captures_to_dict)
417 .map(Value::Dict)
418 .collect(),
419 }
420 }
421
422 #[func]
426 pub fn replace(
427 &self,
428 engine: &mut Engine,
429 context: Tracked<Context>,
430 pattern: StrPattern,
432 replacement: Replacement,
436 #[named]
438 count: Option<usize>,
439 ) -> SourceResult<Str> {
440 let mut output = EcoString::with_capacity(self.as_str().len());
443
444 let mut last_match = 0;
446 let mut handle_match = |range: Range<usize>, dict: Dict| -> SourceResult<()> {
447 output.push_str(&self[last_match..range.start]);
449 last_match = range.end;
450
451 match &replacement {
453 Replacement::Str(s) => output.push_str(s),
454 Replacement::Func(func) => {
455 let piece = func
456 .call(engine, context, [dict])?
457 .cast::<Str>()
458 .at(func.span())?;
459 output.push_str(&piece);
460 }
461 }
462
463 Ok(())
464 };
465
466 let count = count.unwrap_or(usize::MAX);
468 match &pattern {
469 StrPattern::Str(pat) => {
470 for m in self.match_indices(pat.as_str()).take(count) {
471 let (start, text) = m;
472 handle_match(start..start + text.len(), match_to_dict(m))?;
473 }
474 }
475 StrPattern::Regex(re) => {
476 for caps in re.captures_iter(self).take(count) {
477 let m = caps.get(0).unwrap();
479 handle_match(m.start()..m.end(), captures_to_dict(caps))?;
480 }
481 }
482 }
483
484 output.push_str(&self[last_match..]);
486 Ok(output.into())
487 }
488
489 #[func]
492 pub fn trim(
493 &self,
494 #[default]
496 pattern: Option<StrPattern>,
497 #[named]
500 at: Option<StrSide>,
501 #[named]
504 #[default(true)]
505 repeat: bool,
506 ) -> Str {
507 let mut start = matches!(at, Some(StrSide::Start) | None);
508 let end = matches!(at, Some(StrSide::End) | None);
509
510 let trimmed = match pattern {
511 None => match at {
512 None => self.0.trim(),
513 Some(StrSide::Start) => self.0.trim_start(),
514 Some(StrSide::End) => self.0.trim_end(),
515 },
516 Some(StrPattern::Str(pat)) => {
517 let pat = pat.as_str();
518 let mut s = self.as_str();
519 if repeat {
520 if start {
521 s = s.trim_start_matches(pat);
522 }
523 if end {
524 s = s.trim_end_matches(pat);
525 }
526 } else {
527 if start {
528 s = s.strip_prefix(pat).unwrap_or(s);
529 }
530 if end {
531 s = s.strip_suffix(pat).unwrap_or(s);
532 }
533 }
534 s
535 }
536 Some(StrPattern::Regex(re)) => {
537 let s = self.as_str();
538 let mut last = None;
539 let mut range = 0..s.len();
540
541 for m in re.find_iter(s) {
542 let consecutive = last == Some(m.start());
544
545 start &= m.start() == 0 || consecutive;
548 if start {
549 range.start = m.end();
550 start &= repeat;
551 }
552
553 if end && (!consecutive || !repeat) {
556 range.end = m.start();
557 }
558
559 last = Some(m.end());
560 }
561
562 if last.is_some_and(|last| last < s.len()) {
564 range.end = s.len();
565 }
566
567 &s[range.start..range.start.max(range.end)]
568 }
569 };
570
571 trimmed.into()
572 }
573
574 #[func]
583 pub fn split(
584 &self,
585 #[default]
587 pattern: Option<StrPattern>,
588 ) -> Array {
589 let s = self.as_str();
590 match pattern {
591 None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(),
592 Some(StrPattern::Str(pat)) => {
593 s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect()
594 }
595 Some(StrPattern::Regex(re)) => {
596 re.split(s).map(|v| Value::Str(v.into())).collect()
597 }
598 }
599 }
600
601 #[func(title = "Reverse")]
603 pub fn rev(&self) -> Str {
604 let mut s = EcoString::with_capacity(self.0.len());
605 for grapheme in self.as_str().graphemes(true).rev() {
606 s.push_str(grapheme);
607 }
608 s.into()
609 }
610}
611
612impl Deref for Str {
613 type Target = str;
614
615 fn deref(&self) -> &str {
616 &self.0
617 }
618}
619
620impl Debug for Str {
621 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
622 Debug::fmt(self.as_str(), f)
623 }
624}
625
626impl Display for Str {
627 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
628 Display::fmt(self.as_str(), f)
629 }
630}
631
632impl Repr for Str {
633 fn repr(&self) -> EcoString {
634 self.as_ref().repr()
635 }
636}
637
638impl Repr for EcoString {
639 fn repr(&self) -> EcoString {
640 self.as_ref().repr()
641 }
642}
643
644impl Repr for str {
645 fn repr(&self) -> EcoString {
646 let mut r = EcoString::with_capacity(self.len() + 2);
647 r.push('"');
648 for c in self.chars() {
649 match c {
650 '\0' => r.push_str(r"\u{0}"),
651 '\'' => r.push('\''),
652 '"' => r.push_str(r#"\""#),
653 _ => r.extend(c.escape_debug()),
654 }
655 }
656 r.push('"');
657 r
658 }
659}
660
661impl Repr for char {
662 fn repr(&self) -> EcoString {
663 EcoString::from(*self).repr()
664 }
665}
666
667impl Add for Str {
668 type Output = Self;
669
670 fn add(mut self, rhs: Self) -> Self::Output {
671 self += rhs;
672 self
673 }
674}
675
676impl AddAssign for Str {
677 fn add_assign(&mut self, rhs: Self) {
678 self.0.push_str(rhs.as_str());
679 }
680}
681
682impl AsRef<str> for Str {
683 fn as_ref(&self) -> &str {
684 self
685 }
686}
687
688impl Borrow<str> for Str {
689 fn borrow(&self) -> &str {
690 self
691 }
692}
693
694impl From<char> for Str {
695 fn from(c: char) -> Self {
696 Self(c.into())
697 }
698}
699
700impl From<&str> for Str {
701 fn from(s: &str) -> Self {
702 Self(s.into())
703 }
704}
705
706impl From<EcoString> for Str {
707 fn from(s: EcoString) -> Self {
708 Self(s)
709 }
710}
711
712impl From<String> for Str {
713 fn from(s: String) -> Self {
714 Self(s.into())
715 }
716}
717
718impl From<Cow<'_, str>> for Str {
719 fn from(s: Cow<str>) -> Self {
720 Self(s.into())
721 }
722}
723
724impl FromIterator<char> for Str {
725 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
726 Self(iter.into_iter().collect())
727 }
728}
729
730impl From<Str> for EcoString {
731 fn from(str: Str) -> Self {
732 str.0
733 }
734}
735
736impl From<Str> for String {
737 fn from(s: Str) -> Self {
738 s.0.into()
739 }
740}
741
742cast! {
743 char,
744 self => Value::Str(self.into()),
745 string: Str => {
746 let mut chars = string.chars();
747 match (chars.next(), chars.next()) {
748 (Some(c), None) => c,
749 _ => bail!("expected exactly one character"),
750 }
751 },
752}
753
754cast! {
755 &str,
756 self => Value::Str(self.into()),
757}
758
759cast! {
760 EcoString,
761 self => Value::Str(self.into()),
762 v: Str => v.into(),
763}
764
765cast! {
766 String,
767 self => Value::Str(self.into()),
768 v: Str => v.into(),
769}
770
771pub enum ToStr {
773 Str(Str),
775 Int(i64),
777}
778
779cast! {
780 ToStr,
781 v: i64 => Self::Int(v),
782 v: f64 => Self::Str(repr::display_float(v).into()),
783 v: Decimal => Self::Str(format_str!("{}", v)),
784 v: Version => Self::Str(format_str!("{}", v)),
785 v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?),
786 v: Label => Self::Str(v.resolve().as_str().into()),
787 v: Type => Self::Str(v.long_name().into()),
788 v: Str => Self::Str(v),
789}
790
791fn match_to_dict((start, text): (usize, &str)) -> Dict {
793 dict! {
794 "start" => start,
795 "end" => start + text.len(),
796 "text" => text,
797 "captures" => Array::new(),
798 }
799}
800
801fn captures_to_dict(cap: regex::Captures) -> Dict {
803 let m = cap.get(0).expect("missing first match");
804 dict! {
805 "start" => m.start(),
806 "end" => m.end(),
807 "text" => m.as_str(),
808 "captures" => cap.iter()
809 .skip(1)
810 .map(|opt| opt.map_or(Value::None, |m| m.as_str().into_value()))
811 .collect::<Array>(),
812 }
813}
814
815#[cold]
817fn out_of_bounds(index: i64, len: usize) -> EcoString {
818 eco_format!("string index out of bounds (index: {}, len: {})", index, len)
819}
820
821#[cold]
823fn no_default_and_out_of_bounds(index: i64, len: usize) -> EcoString {
824 eco_format!("no default value was specified and string index out of bounds (index: {}, len: {})", index, len)
825}
826
827#[cold]
829fn not_a_char_boundary(index: i64) -> EcoString {
830 eco_format!("string index {} is not a character boundary", index)
831}
832
833#[cold]
835fn string_is_empty() -> EcoString {
836 "string is empty".into()
837}
838
839#[ty(scope)]
858#[derive(Debug, Clone)]
859pub struct Regex(regex::Regex);
860
861impl Regex {
862 pub fn new(re: &str) -> StrResult<Self> {
864 regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}"))
865 }
866}
867
868#[scope]
869impl Regex {
870 #[func(constructor)]
872 pub fn construct(
873 regex: Spanned<Str>,
884 ) -> SourceResult<Regex> {
885 Self::new(®ex.v).at(regex.span)
886 }
887}
888
889impl Deref for Regex {
890 type Target = regex::Regex;
891
892 fn deref(&self) -> &Self::Target {
893 &self.0
894 }
895}
896
897impl Repr for Regex {
898 fn repr(&self) -> EcoString {
899 eco_format!("regex({})", self.0.as_str().repr())
900 }
901}
902
903impl PartialEq for Regex {
904 fn eq(&self, other: &Self) -> bool {
905 self.0.as_str() == other.0.as_str()
906 }
907}
908
909impl Hash for Regex {
910 fn hash<H: Hasher>(&self, state: &mut H) {
911 self.0.as_str().hash(state);
912 }
913}
914
915#[derive(Debug, Clone)]
917pub enum StrPattern {
918 Str(Str),
920 Regex(Regex),
922}
923
924cast! {
925 StrPattern,
926 self => match self {
927 Self::Str(v) => v.into_value(),
928 Self::Regex(v) => v.into_value(),
929 },
930 v: Str => Self::Str(v),
931 v: Regex => Self::Regex(v),
932}
933
934#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
936pub enum StrSide {
937 Start,
940 End,
942}
943
944cast! {
945 StrSide,
946 v: Alignment => match v {
947 Alignment::START => Self::Start,
948 Alignment::END => Self::End,
949 _ => bail!("expected either `start` or `end`"),
950 },
951}
952
953pub enum Replacement {
955 Str(Str),
957 Func(Func),
960}
961
962cast! {
963 Replacement,
964 self => match self {
965 Self::Str(v) => v.into_value(),
966 Self::Func(v) => v.into_value(),
967 },
968 v: Str => Self::Str(v),
969 v: Func => Self::Func(v)
970}