asciidoc_parser/parser/
parser.rs

1use std::collections::HashMap;
2
3use crate::{
4    Document, HasSpan,
5    document::{Attribute, InterpretedValue},
6    parser::{
7        AllowableValue, AttributeValue, HtmlSubstitutionRenderer, InlineSubstitutionRenderer,
8        ModificationContext, PathResolver,
9    },
10    warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser<'p> {
17    /// Attribute values at current state of parsing.
18    pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20    /// Default values for attributes if "set."
21    default_attribute_values: HashMap<String, String>,
22
23    /// Specifies how the basic raw text of a simple block will be converted to
24    /// the format which will ultimately be presented in the final output.
25    ///
26    /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27    /// provide alternative implementations.
28    pub(crate) renderer: &'p dyn InlineSubstitutionRenderer,
29
30    /// Specifies how to generate clean and secure paths relative to the parsing
31    /// context.
32    pub path_resolver: PathResolver,
33}
34
35impl<'p> Parser<'p> {
36    /// Parse a UTF-8 string as an AsciiDoc document.
37    ///
38    /// Note that the document references the underlying source string and
39    /// necessarily has the same lifetime as the source.
40    ///
41    /// The [`Document`] data structure returned by this call and nearly all
42    /// data structures contained within it are gated by the lifetime of the
43    /// `source` text passed in to this function. For that reason all of
44    /// those data structures are given the lifetime `'src`.
45    ///
46    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
47    /// encoding is allowed if a byte-order-mark (BOM) is present at the
48    /// start of a file. This format is not directly supported by the
49    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
50    /// UTF-8 prior to parsing.
51    ///
52    /// **IMPORTANT:** The `Parser` struct will be updated with attributes and
53    /// similar values discovered during parsing.
54    ///
55    /// # Warnings, not errors
56    ///
57    /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
58    /// return an [`Option`] or [`Result`] data type. There may be any number of
59    /// character sequences that have ambiguous or potentially unintended
60    /// meanings. For that reason, a caller is advised to review the warnings
61    /// provided via the [`warnings()`] iterator.
62    ///
63    /// [`warnings()`]: Document::warnings
64    pub fn parse<'src>(&mut self, source: &'src str) -> Document<'src> {
65        Document::parse(source, self)
66    }
67
68    /// Retrieves the current interpreted value of a [document attribute].
69    ///
70    /// Each document holds a set of name-value pairs called document
71    /// attributes. These attributes provide a means of configuring the AsciiDoc
72    /// processor, declaring document metadata, and defining reusable content.
73    /// This page introduces document attributes and answers some questions
74    /// about the terminology used when referring to them.
75    ///
76    /// ## What are document attributes?
77    ///
78    /// Document attributes are effectively document-scoped variables for the
79    /// AsciiDoc language. The AsciiDoc language defines a set of built-in
80    /// attributes, and also allows the author (or extensions) to define
81    /// additional document attributes, which may replace built-in attributes
82    /// when permitted.
83    ///
84    /// Built-in attributes either provide access to read-only information about
85    /// the document and its environment or allow the author to configure
86    /// behavior of the AsciiDoc processor for a whole document or select
87    /// regions. Built-in attributes are effectively unordered. User-defined
88    /// attribute serve as a powerful text replacement tool. User-defined
89    /// attributes are stored in the order in which they are defined.
90    ///
91    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
92    pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
93        self.attribute_values
94            .get(name.as_ref())
95            .map(|av| av.value.clone())
96            .map(|av| {
97                if let InterpretedValue::Set = av
98                    && let Some(default) = self.default_attribute_values.get(name.as_ref())
99                {
100                    InterpretedValue::Value(default.clone())
101                } else {
102                    av
103                }
104            })
105            .unwrap_or(InterpretedValue::Unset)
106    }
107
108    /// Returns `true` if the parser has a [document attribute] by this name.
109    ///
110    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
111    pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
112        self.attribute_values.contains_key(name.as_ref())
113    }
114
115    /// Returns `true` if the parser has a [document attribute] by this name
116    /// which has been set (i.e. is present and not [unset]).
117    ///
118    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
119    /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
120    pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
121        self.attribute_values
122            .get(name.as_ref())
123            .map(|a| a.value != InterpretedValue::Unset)
124            .unwrap_or(false)
125    }
126
127    /// Sets the value of an [intrinsic attribute].
128    ///
129    /// Intrinsic attributes are set automatically by the processor. These
130    /// attributes provide information about the document being processed (e.g.,
131    /// `docfile`), the security mode under which the processor is running
132    /// (e.g., `safe-mode-name`), and information about the user’s environment
133    /// (e.g., `user-home`).
134    ///
135    /// The [`modification_context`](ModificationContext) establishes whether
136    /// the value can be subsequently modified by the document header and/or in
137    /// the document body.
138    ///
139    /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
140    /// are always permitted. The last such call for any given attribute name
141    /// takes precendence.
142    ///
143    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
144    ///
145    /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
146    pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
147        mut self,
148        name: N,
149        value: V,
150        modification_context: ModificationContext,
151    ) -> Self {
152        let attribute_value = AttributeValue {
153            allowable_value: AllowableValue::Any,
154            modification_context,
155            value: InterpretedValue::Value(value.as_ref().to_string()),
156        };
157
158        self.attribute_values
159            .insert(name.as_ref().to_lowercase(), attribute_value);
160
161        self
162    }
163
164    /* Comment out until we're prepared to use and test this.
165        /// Sets the default value for an [intrinsic attribute].
166        ///
167        /// Default values for attributes are provided automatically by the
168        /// processor. These values provide a falllback textual value for an
169        /// attribute when it is merely "set" by the document via API, header, or
170        /// document body.
171        ///
172        /// Calling this does not imply that the value is set automatically by
173        /// default, nor does it establish any policy for where the value may be
174        /// modified. For that, please use [`with_intrinsic_attribute`].
175        ///
176        /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
177        /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
178        pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
179            mut self,
180            name: N,
181            value: V,
182        ) -> Self {
183            self.default_attribute_values
184                .insert(name.as_ref().to_string(), value.as_ref().to_string());
185
186            self
187        }
188    */
189
190    /// Sets the value of an [intrinsic attribute] from a boolean flag.
191    ///
192    /// A boolean `true` is interpreted as "set." A boolean `false` is
193    /// interpreted as "unset."
194    ///
195    /// Intrinsic attributes are set automatically by the processor. These
196    /// attributes provide information about the document being processed (e.g.,
197    /// `docfile`), the security mode under which the processor is running
198    /// (e.g., `safe-mode-name`), and information about the user’s environment
199    /// (e.g., `user-home`).
200    ///
201    /// The [`modification_context`](ModificationContext) establishes whether
202    /// the value can be subsequently modified by the document header and/or in
203    /// the document body.
204    ///
205    /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
206    /// always permitted. The last such call for any given attribute name takes
207    /// precendence.
208    ///
209    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
210    ///
211    /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
212    pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
213        mut self,
214        name: N,
215        value: bool,
216        modification_context: ModificationContext,
217    ) -> Self {
218        let attribute_value = AttributeValue {
219            allowable_value: AllowableValue::Any,
220            modification_context,
221            value: if value {
222                InterpretedValue::Set
223            } else {
224                InterpretedValue::Unset
225            },
226        };
227
228        self.attribute_values
229            .insert(name.as_ref().to_lowercase(), attribute_value);
230
231        self
232    }
233
234    /// Called from [`Header::parse()`] to accept or reject an attribute value.
235    pub(crate) fn set_attribute_from_header<'src>(
236        &mut self,
237        attr: &Attribute<'src>,
238        warnings: &mut Vec<Warning<'src>>,
239    ) {
240        let attr_name = attr.name().data().to_lowercase();
241
242        let existing_attr = self.attribute_values.get(&attr_name);
243
244        // Verify that we have permission to overwrite any existing attribute value.
245        if let Some(existing_attr) = existing_attr
246            && existing_attr.modification_context == ModificationContext::ApiOnly
247        {
248            warnings.push(Warning {
249                source: attr.span(),
250                warning: WarningType::AttributeValueIsLocked(attr_name),
251            });
252            return;
253        }
254
255        let mut value = attr.value().clone();
256
257        if let InterpretedValue::Set = value
258            && let Some(default_value) = self.default_attribute_values.get(&attr_name)
259        {
260            value = InterpretedValue::Value(default_value.clone());
261        }
262
263        let attribute_value = AttributeValue {
264            allowable_value: AllowableValue::Any,
265            modification_context: ModificationContext::Anywhere,
266            value,
267        };
268
269        self.attribute_values.insert(attr_name, attribute_value);
270    }
271
272    /// Called from [`Header::parse()`] for a value that is derived from parsing
273    /// the header (except for attribute lines).
274    pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
275        &mut self,
276        name: N,
277        value: V,
278    ) {
279        let attr_name = name.as_ref().to_lowercase();
280
281        let attribute_value = AttributeValue {
282            allowable_value: AllowableValue::Any,
283            modification_context: ModificationContext::Anywhere,
284            value: InterpretedValue::Value(value.as_ref().to_owned()),
285        };
286
287        self.attribute_values.insert(attr_name, attribute_value);
288    }
289
290    /// Called from [`Block::parse()`] to accept or reject an attribute value
291    /// from a document (body) attribute.
292    pub(crate) fn set_attribute_from_body<'src>(
293        &mut self,
294        attr: &Attribute<'src>,
295        warnings: &mut Vec<Warning<'src>>,
296    ) {
297        let attr_name = attr.name().data().to_lowercase();
298
299        // Verify that we have permission to overwrite any existing attribute value.
300        if let Some(existing_attr) = self.attribute_values.get(&attr_name)
301            && existing_attr.modification_context != ModificationContext::Anywhere
302        {
303            warnings.push(Warning {
304                source: attr.span(),
305                warning: WarningType::AttributeValueIsLocked(attr_name),
306            });
307            return;
308        }
309
310        let attribute_value = AttributeValue {
311            allowable_value: AllowableValue::Any,
312            modification_context: ModificationContext::Anywhere,
313            value: attr.value().clone(),
314        };
315
316        self.attribute_values.insert(attr_name, attribute_value);
317    }
318}
319
320const DEFAULT_RENDERER: &'static dyn InlineSubstitutionRenderer = &HtmlSubstitutionRenderer {};
321
322impl Default for Parser<'_> {
323    fn default() -> Self {
324        Self {
325            attribute_values: built_in_attrs(),
326            default_attribute_values: built_in_default_values(),
327            renderer: DEFAULT_RENDERER,
328            path_resolver: PathResolver::default(),
329        }
330    }
331}
332
333fn built_in_attrs() -> HashMap<String, AttributeValue> {
334    let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
335
336    attrs.insert(
337        "empty".to_owned(),
338        AttributeValue {
339            allowable_value: AllowableValue::Any,
340            modification_context: ModificationContext::ApiOnly,
341            value: InterpretedValue::Value("".into()),
342        },
343    );
344
345    attrs.insert(
346        "sp".to_owned(),
347        AttributeValue {
348            allowable_value: AllowableValue::Any,
349            modification_context: ModificationContext::ApiOnly,
350            value: InterpretedValue::Value(" ".into()),
351        },
352    );
353
354    attrs.insert(
355        "deg".to_owned(),
356        AttributeValue {
357            allowable_value: AllowableValue::Any,
358            modification_context: ModificationContext::ApiOnly,
359            value: InterpretedValue::Value("&#176;".into()),
360        },
361    );
362
363    attrs.insert(
364        "plus".to_owned(),
365        AttributeValue {
366            allowable_value: AllowableValue::Any,
367            modification_context: ModificationContext::ApiOnly,
368            value: InterpretedValue::Value("&#43;".into()),
369        },
370    );
371
372    attrs.insert(
373        "toc".to_owned(),
374        AttributeValue {
375            allowable_value: AllowableValue::Any,
376            modification_context: ModificationContext::ApiOrHeader,
377            value: InterpretedValue::Unset,
378        },
379    );
380
381    attrs.insert(
382        "sectids".to_owned(),
383        AttributeValue {
384            allowable_value: AllowableValue::Empty,
385            modification_context: ModificationContext::Anywhere,
386            value: InterpretedValue::Set,
387        },
388    );
389
390    attrs.insert(
391        "example-caption".to_owned(),
392        AttributeValue {
393            allowable_value: AllowableValue::Any,
394            modification_context: ModificationContext::Anywhere,
395            value: InterpretedValue::Set,
396        },
397    );
398
399    // TO DO: Replace ./images with value of imagesdir if that is non-default.
400    attrs.insert(
401        "iconsdir".to_owned(),
402        AttributeValue {
403            allowable_value: AllowableValue::Any,
404            modification_context: ModificationContext::Anywhere,
405            value: InterpretedValue::Set,
406        },
407    );
408
409    attrs
410}
411
412fn built_in_default_values() -> HashMap<String, String> {
413    let mut defaults: HashMap<String, String> = HashMap::new();
414
415    defaults.insert("example-caption".to_owned(), "Example".to_owned());
416    defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
417    defaults.insert("sectnums".to_owned(), "all".to_owned());
418    defaults.insert("toc".to_owned(), "auto".to_owned());
419
420    defaults
421}