asciidoc_parser/document/
author.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4
5use crate::{Parser, Span, content::Content};
6
7/// Represents a single author as (typically) described on the [author line].
8///
9/// The attributes `firstname`, `middlename`, `lastname`, and `authorinitials`
10/// are automatically derived from the full value of the author string. When
11/// assigned implicitly via the author line, the value includes all of the
12/// characters and words prior to the semicolon (`;`), angle bracket (`<`), or
13/// the end of the line. Note that when using the implicit author line, the full
14/// name can have a maximum of three space-separated names. If it has more, then
15/// the full name is assigned to the `firstname` attribute. You can adjoin names
16/// using an underscore (`_`) character.
17///
18/// [author line]: https://docs.asciidoctor.org/asciidoc/latest/document/author-line/
19#[derive(Clone, Debug, Eq, PartialEq)]
20pub struct Author {
21    name: String,
22    firstname: String,
23    middlename: Option<String>,
24    lastname: Option<String>,
25    email: Option<String>,
26}
27
28impl Author {
29    pub(crate) fn parse(source: &str, parser: &Parser) -> Option<Self> {
30        let source = source.trim();
31        if source.is_empty() {
32            return None;
33        }
34
35        // Parse the raw input first to extract components, then apply attribute
36        // substitution to individual components afterwards. Special case: If the entire
37        // input is a single attribute reference, treat the expanded result as a single
38        // name.
39        let is_single_attribute = source.trim().starts_with('{')
40            && source.trim().ends_with('}')
41            && source.matches('{').count() == 1;
42
43        if is_single_attribute {
44            // Entire input is a single attribute reference: Expand and treat as single
45            // name.
46            let expanded_source = apply_author_subs(source, parser);
47
48            let name_with_spaces = replace_underscores_with_spaces(expanded_source);
49            Some(Self {
50                name: name_with_spaces.clone(),
51                firstname: name_with_spaces,
52                middlename: None,
53                lastname: None,
54                email: None,
55            })
56        } else if let Some(captures) = AUTHOR.captures(source) {
57            // Raw input matches author pattern: Extract components then apply
58            // substitutions.
59            let name_without_email = source.split_once('<').unwrap_or((source, "")).0.trim();
60            let name = replace_underscores_with_spaces(name_without_email.to_string());
61
62            // Extract raw components first.
63            let firstname =
64                replace_underscores_with_spaces(apply_author_subs(&captures[1], parser));
65            let mut middlename = captures
66                .get(2)
67                .map(|m| replace_underscores_with_spaces(apply_author_subs(m.as_str(), parser)));
68            let mut lastname = captures
69                .get(3)
70                .map(|m| replace_underscores_with_spaces(apply_author_subs(m.as_str(), parser)));
71            let email = captures
72                .get(4)
73                .map(|m| apply_author_subs(m.as_str(), parser));
74
75            if middlename.is_some() && lastname.is_none() {
76                lastname = middlename;
77                middlename = None;
78            }
79
80            Some(Self {
81                name,
82                firstname,
83                middlename,
84                lastname,
85                email,
86            })
87        } else if source.contains('{') {
88            // Input contains attributes that prevent regex match: Expand first, then try
89            // parsing.
90            let expanded_source = apply_author_subs(source, parser);
91
92            if let Some(captures) = AUTHOR.captures(&expanded_source) {
93                // After expansion, it matches the pattern: Parse normally.
94                let name_without_email = expanded_source
95                    .split_once('<')
96                    .unwrap_or((&expanded_source, ""))
97                    .0
98                    .trim();
99                let name = replace_underscores_with_spaces(name_without_email.to_string());
100
101                let firstname = replace_underscores_with_spaces(captures[1].to_string());
102                let mut middlename = captures
103                    .get(2)
104                    .map(|m| replace_underscores_with_spaces(m.as_str().to_string()));
105                let mut lastname = captures
106                    .get(3)
107                    .map(|m| replace_underscores_with_spaces(m.as_str().to_string()));
108                let email = captures.get(4).map(|m| m.as_str().to_string());
109
110                if middlename.is_some() && lastname.is_none() {
111                    lastname = middlename;
112                    middlename = None;
113                }
114
115                Some(Self {
116                    name,
117                    firstname,
118                    middlename,
119                    lastname,
120                    email,
121                })
122            } else {
123                // Even after expansion, doesn't match: Treat as single name with HTML encoding.
124                let mut expanded_name = expanded_source;
125
126                if expanded_name.contains('<') && expanded_name.contains('>') {
127                    let span = crate::Span::new(&expanded_name);
128                    let mut content = crate::content::Content::from(span);
129                    crate::content::SubstitutionStep::SpecialCharacters.apply(
130                        &mut content,
131                        parser,
132                        None,
133                    );
134                    expanded_name = content.rendered().to_string();
135                }
136
137                let name_with_spaces = replace_underscores_with_spaces(expanded_name);
138                Some(Self {
139                    name: name_with_spaces.clone(),
140                    firstname: name_with_spaces,
141                    middlename: None,
142                    lastname: None,
143                    email: None,
144                })
145            }
146        } else {
147            // Input doesn't contain attributes and doesn't match pattern: Treat as single
148            // name.
149            let mut name = source.to_string();
150
151            // Apply HTML encoding for unparseable patterns that contain angle brackets.
152            if name.contains('<') && name.contains('>') {
153                let span = crate::Span::new(&name);
154                let mut content = crate::content::Content::from(span);
155                crate::content::SubstitutionStep::SpecialCharacters.apply(
156                    &mut content,
157                    parser,
158                    None,
159                );
160                name = content.rendered().to_string();
161            }
162
163            let name_with_spaces = replace_underscores_with_spaces(name);
164            Some(Self {
165                name: name_with_spaces.clone(),
166                firstname: name_with_spaces,
167                middlename: None,
168                lastname: None,
169                email: None,
170            })
171        }
172    }
173
174    /// Returns the full name of the author.
175    ///
176    /// The name includes the entire author declaration except for email.
177    pub fn name(&self) -> &str {
178        &self.name
179    }
180
181    /// Returns the first, forename, or given name of the author.
182    ///
183    /// The first space-separated name in the value of the `author` attribute is
184    /// automatically assigned to `firstname`.
185    pub fn firstname(&self) -> &str {
186        &self.firstname
187    }
188
189    /// Returns the middle name or initial of the author.
190    ///
191    /// If author contains three space-separated names, the second name is
192    /// assigned to the `middlename` attribute.
193    pub fn middlename(&self) -> Option<&str> {
194        self.middlename.as_deref()
195    }
196
197    /// Returns the last, surname, or family name of the author.
198    ///
199    /// If the author name contains two or three space-separated names, the last
200    /// of those names is assigned to the `lastname` attribute.
201    pub fn lastname(&self) -> Option<&str> {
202        self.lastname.as_deref()
203    }
204
205    /// Returns the email address or URL associated with the author.
206    ///
207    /// When assigned via the author line, it’s enclosed in a pair of angle
208    /// brackets (`< >`). A URL can be used in place of the email address.
209    pub fn email(&self) -> Option<&str> {
210        self.email.as_deref()
211    }
212
213    /// Returns the initials of the author.
214    ///
215    /// The first character of the `firstname`, `middlename`, and `lastname`
216    /// attribute values are assigned to the `authorinitials` attribute. The
217    /// value of the `authorinitials` attribute will consist of three characters
218    /// or less depending on how many parts are in the author’s name.
219    pub fn initials(&self) -> String {
220        format!(
221            "{first}{middle}{last}",
222            first = first_char_or_empty_string(&self.firstname),
223            middle = opt_first_char_or_empty_string(self.middlename.as_deref()),
224            last = opt_first_char_or_empty_string(self.lastname.as_deref()),
225        )
226    }
227}
228
229fn first_char_or_empty_string(s: &str) -> String {
230    s.chars().next().map_or(String::new(), |c| c.to_string())
231}
232
233fn opt_first_char_or_empty_string(s: Option<&str>) -> String {
234    s.map(first_char_or_empty_string).unwrap_or_default()
235}
236
237/// Replace underscores with spaces in a name component.
238fn replace_underscores_with_spaces(name: String) -> String {
239    name.replace('_', " ")
240}
241
242static AUTHOR: LazyLock<Regex> = LazyLock::new(|| {
243    #[allow(clippy::unwrap_used)]
244    Regex::new(
245        r#"(?x)
246            ^
247
248            # Group 1: First name (required)
249            ([a-zA-Z0-9_\p{L}\p{N}&\#;][a-zA-Z0-9_\p{L}\p{N}\-'.&\#;]*)
250
251            # Group 2: Middle name (optional)
252            (?:\ +([a-zA-Z0-9_\p{L}\p{N}&\#;][a-zA-Z0-9_\p{L}\p{N}\-'.&\#;]*))?
253
254            # Group 3: Last name (optional)
255            (?:\ +([a-zA-Z0-9_\p{L}\p{N}&\#;][a-zA-Z0-9_\p{L}\p{N}\-'.&\#;]*))?
256
257            # Group 4: Email address (optional)
258            (?:\ +<([^>]+)>)?
259
260            $
261        "#,
262    )
263    .unwrap()
264});
265
266fn apply_author_subs(source: &str, parser: &Parser) -> String {
267    let span = Span::new(source);
268    let mut content = Content::from(span);
269
270    use crate::content::SubstitutionStep;
271
272    // Apply attribute references first.
273    SubstitutionStep::AttributeReferences.apply(&mut content, parser, None);
274
275    // Apply HTML encoding:
276    // - Single attribute reference (like {full-author}): No HTML encoding.
277    // - Single attribute in email position (like <{email}>): No HTML encoding.
278    // - Multiple attributes or complex patterns: HTML encoding.
279    // - Don't HTML encode if the content only has pre-existing HTML entities.
280    let is_simple_single_attribute = source.trim().starts_with('{')
281        && source.trim().ends_with('}')
282        && source.matches('{').count() == 1;
283
284    let has_multiple_attributes = source.matches('{').count() > 1;
285
286    // Check if we should apply HTML encoding.
287    let rendered = content.rendered();
288    let has_angle_brackets = rendered.contains('<') && rendered.contains('>');
289    let has_unencoded_ampersand = rendered.contains('&') && !rendered.contains("&amp;");
290
291    if !is_simple_single_attribute
292        && has_multiple_attributes
293        && (has_angle_brackets || has_unencoded_ampersand)
294    {
295        SubstitutionStep::SpecialCharacters.apply(&mut content, parser, None);
296    }
297
298    content.rendered().to_string()
299}