ayaka_primitive/
text.rs

1//! The text parser.
2
3use crate::*;
4use nom::{
5    branch::alt,
6    bytes::complete::{take_till, take_till1, take_until, take_while, take_while1},
7    character::complete::{char, one_of},
8    combinator::{all_consuming, iterator, map},
9    error::VerboseError,
10    multi::many0,
11    sequence::{delimited, terminated},
12    *,
13};
14use serde::Deserialize;
15
16/// A collection of [`SubText`].
17#[derive(Debug, Clone, Default, PartialEq, Eq)]
18pub struct Text {
19    /// The tag of current character.
20    pub ch_tag: Option<String>,
21    /// The alias of current character.
22    pub ch_alias: Option<String>,
23    /// The texts.
24    pub sub_texts: Vec<SubText>,
25}
26
27/// A part of a line, either some texts or a command.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum SubText {
30    /// Special character
31    Char(char),
32    /// Raw texts.
33    Str(String),
34    /// A TeX-like command in the text.
35    Cmd(String, Vec<SubText>),
36}
37
38type Res<I, O> = IResult<I, O, VerboseError<I>>;
39
40fn take_space(i: &str) -> Res<&str, &str> {
41    take_while(|c: char| c.is_whitespace())(i)
42}
43
44fn take_cmd(i: &str) -> Res<&str, &str> {
45    take_while1(|c: char| c.is_ascii_alphabetic())(i)
46}
47
48fn is_str_end(c: char) -> bool {
49    c.is_whitespace() || c == '\\' || c == '{' || c == '}'
50}
51
52fn parse_arg(i: &str) -> Res<&str, SubText> {
53    let (i, _) = take_space(i)?;
54    let (i, sub_text) = delimited(char('{'), parse_sub_text, char('}'))(i)?;
55    Ok((i, sub_text))
56}
57
58fn parse_escape_command(i: &str) -> Res<&str, SubText> {
59    let (i, cmd) = take_cmd(i)?;
60    let (i, args) = many0(parse_arg)(i)?;
61    Ok((i, SubText::Cmd(cmd.to_string(), args)))
62}
63
64fn parse_escape_char(i: &str) -> Res<&str, SubText> {
65    let (i, c) = one_of("\\{}/")(i)?;
66    Ok((i, SubText::Char(c)))
67}
68
69fn parse_sub_text_escape(i: &str) -> Res<&str, SubText> {
70    let (i, _) = char('\\')(i)?;
71    alt((parse_escape_char, parse_escape_command))(i)
72}
73
74fn parse_sub_text_str(i: &str) -> Res<&str, SubText> {
75    let (i, pre_space) = take_space(i)?;
76    let (i, str) = if pre_space.is_empty() {
77        take_till1(is_str_end)(i)
78    } else {
79        take_till(is_str_end)(i)
80    }?;
81    let (i, post_space) = take_space(i)?;
82    let str = format!(
83        "{}{}{}",
84        if !pre_space.is_empty() { " " } else { "" },
85        str,
86        if !post_space.is_empty() { " " } else { "" }
87    );
88    Ok((i, SubText::Str(str)))
89}
90
91fn parse_sub_text(i: &str) -> Res<&str, SubText> {
92    alt((parse_sub_text_escape, parse_sub_text_str))(i)
93}
94
95fn parse_sub_texts(i: &str) -> Res<&str, Vec<SubText>> {
96    let mut it = iterator(i, parse_sub_text);
97    let sub_texts = it.collect();
98    let (i, ()) = it.finish()?;
99    Ok((i, sub_texts))
100}
101
102fn parse_text_without_ch(i: &str) -> Res<&str, Text> {
103    let (i, sub_texts) = parse_sub_texts(i)?;
104    let text = Text {
105        ch_tag: None,
106        ch_alias: None,
107        sub_texts,
108    };
109    Ok((i, text))
110}
111
112fn parse_text_with_ch(i: &str) -> Res<&str, Text> {
113    let (i, _) = char('/')(i)?;
114    let (i, ch_tag) = map(terminated(take_until("/"), char('/')), str::trim)(i)?;
115    let (i, ch_alias) = map(terminated(take_until("/"), char('/')), str::trim)(i)?;
116    let (i, mut text) = parse_text_without_ch(i)?;
117    if !ch_tag.is_empty() {
118        text.ch_tag = Some(ch_tag.to_string());
119    }
120    if !ch_alias.is_empty() {
121        text.ch_alias = Some(ch_alias.to_string());
122    }
123    Ok((i, text))
124}
125
126fn parse_text(i: &str) -> Res<&str, Text> {
127    all_consuming(alt((parse_text_with_ch, parse_text_without_ch)))(i)
128}
129
130impl<'a> TryFrom<&'a str> for Text {
131    type Error = VerboseError<&'a str>;
132
133    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
134        parse_text(value).finish().map(|(_, text)| text)
135    }
136}
137
138impl<'de> Deserialize<'de> for Text {
139    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
140    where
141        D: serde::Deserializer<'de>,
142    {
143        let raw_value = RawValue::deserialize(deserializer)?;
144        let text = Text::try_from(raw_value.get_str().as_ref())
145            .map_err(<D::Error as serde::de::Error>::custom)?;
146        Ok(text)
147    }
148}
149
150#[cfg(test)]
151pub mod test {
152    use crate::text::{SubText, Text};
153
154    pub fn parse(s: &str) -> Text {
155        Text::try_from(s).unwrap()
156    }
157
158    pub fn text(sub_texts: Vec<SubText>) -> Text {
159        Text {
160            ch_tag: None,
161            ch_alias: None,
162            sub_texts,
163        }
164    }
165
166    pub fn text_ch(tag: Option<&str>, alias: Option<&str>, sub_texts: Vec<SubText>) -> Text {
167        Text {
168            ch_tag: tag.map(|s| s.into()),
169            ch_alias: alias.map(|s| s.into()),
170            sub_texts,
171        }
172    }
173
174    pub fn char(c: char) -> SubText {
175        SubText::Char(c)
176    }
177
178    pub fn str(s: impl Into<String>) -> SubText {
179        SubText::Str(s.into())
180    }
181
182    pub fn cmd(cmd: impl Into<String>, args: Vec<SubText>) -> SubText {
183        SubText::Cmd(cmd.into(), args)
184    }
185
186    #[test]
187    fn basic() {
188        assert_eq!(parse("\\\\"), text(vec![char('\\')]));
189        assert_eq!(parse("\\{"), text(vec![char('{')]));
190    }
191
192    #[test]
193    fn space() {
194        assert_eq!(
195            parse("\\cmd{123} \\cmd{123}"),
196            text(vec![
197                cmd("cmd", vec![str("123")]),
198                str(" "),
199                cmd("cmd", vec![str("123")]),
200            ])
201        );
202        assert_eq!(
203            parse("\\par \\cmd{123}"),
204            text(vec![
205                cmd("par", vec![]),
206                str(" "),
207                cmd("cmd", vec![str("123")])
208            ])
209        );
210    }
211
212    #[test]
213    fn embedded() {
214        assert_eq!(
215            parse(r"\switch{\exec{114514}}"),
216            text(vec![cmd("switch", vec![cmd("exec", vec![str("114514")])])])
217        );
218    }
219
220    #[test]
221    fn lf() {
222        assert_eq!(parse(" "), text(vec![str(" ")]));
223        assert_eq!(parse("  "), text(vec![str(" ")]));
224        assert_eq!(parse(" \n "), text(vec![str(" ")]));
225        assert_eq!(parse(" 123 "), text(vec![str(" 123 ")]));
226        assert_eq!(parse(" \n123\t "), text(vec![str(" 123 ")]));
227        assert_eq!(parse("123"), text(vec![str("123")]));
228    }
229
230    #[test]
231    fn character() {
232        assert_eq!(parse("/ch//"), text_ch(Some("ch"), None, vec![]));
233        assert_eq!(
234            parse("/ch/alias/"),
235            text_ch(Some("ch"), Some("alias"), vec![])
236        );
237        assert_eq!(parse("/ / /"), text_ch(None, None, vec![]));
238    }
239}