asciidoc_parser/parser/
parser.rs

1use std::{collections::HashMap, rc::Rc};
2
3use crate::{
4    Document, HasSpan,
5    document::{Attribute, InterpretedValue},
6    parser::{
7        AllowableValue, AttributeValue, HtmlSubstitutionRenderer, InlineSubstitutionRenderer,
8        ModificationContext, PathResolver,
9    },
10    warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser {
17    /// Attribute values at current state of parsing.
18    pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20    /// Default values for attributes if "set."
21    default_attribute_values: HashMap<String, String>,
22
23    /// Specifies how the basic raw text of a simple block will be converted to
24    /// the format which will ultimately be presented in the final output.
25    ///
26    /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27    /// provide alternative implementations.
28    pub(crate) renderer: Rc<dyn InlineSubstitutionRenderer>,
29
30    /// Specifies how to generate clean and secure paths relative to the parsing
31    /// context.
32    pub path_resolver: PathResolver,
33}
34
35impl Parser {
36    /// Parse a UTF-8 string as an AsciiDoc document.
37    ///
38    /// The [`Document`] data structure returned by this call has a '`static`
39    /// lifetime; this is an implementation detail. It retains a copy of the
40    /// `source` string that was passed in, but it is not tied to the lifetime
41    /// of that string.
42    ///
43    /// Nearly all of the data structures contained within the [`Document`]
44    /// structure are tied to the lifetime of the document and have a `'src`
45    /// lifetime to signal their dependency on the source document.
46    ///
47    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
48    /// encoding is allowed if a byte-order-mark (BOM) is present at the
49    /// start of a file. This format is not directly supported by the
50    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
51    /// UTF-8 prior to parsing.
52    ///
53    /// The `Parser` struct will be updated with document attribute values
54    /// discovered during parsing. These values may be inspected using
55    /// [`attribute_value()`].
56    ///
57    /// # Warnings, not errors
58    ///
59    /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
60    /// return an [`Option`] or [`Result`] data type. There may be any number of
61    /// character sequences that have ambiguous or potentially unintended
62    /// meanings. For that reason, a caller is advised to review the warnings
63    /// provided via the [`warnings()`] iterator.
64    ///
65    /// [`warnings()`]: Document::warnings
66    /// [`attribute_value()`]: Self::attribute_value
67    pub fn parse(&mut self, source: &str) -> Document<'static> {
68        // The mutable borrow of self ends when Document::parse returns,
69        // ensuring no mutable reference to Parser escapes with the Document.
70        // The Document is self-contained and owns its source string internally.
71        Document::parse(source, self)
72    }
73
74    /// Retrieves the current interpreted value of a [document attribute].
75    ///
76    /// Each document holds a set of name-value pairs called document
77    /// attributes. These attributes provide a means of configuring the AsciiDoc
78    /// processor, declaring document metadata, and defining reusable content.
79    /// This page introduces document attributes and answers some questions
80    /// about the terminology used when referring to them.
81    ///
82    /// ## What are document attributes?
83    ///
84    /// Document attributes are effectively document-scoped variables for the
85    /// AsciiDoc language. The AsciiDoc language defines a set of built-in
86    /// attributes, and also allows the author (or extensions) to define
87    /// additional document attributes, which may replace built-in attributes
88    /// when permitted.
89    ///
90    /// Built-in attributes either provide access to read-only information about
91    /// the document and its environment or allow the author to configure
92    /// behavior of the AsciiDoc processor for a whole document or select
93    /// regions. Built-in attributes are effectively unordered. User-defined
94    /// attribute serve as a powerful text replacement tool. User-defined
95    /// attributes are stored in the order in which they are defined.
96    ///
97    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
98    pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
99        self.attribute_values
100            .get(name.as_ref())
101            .map(|av| av.value.clone())
102            .map(|av| {
103                if let InterpretedValue::Set = av
104                    && let Some(default) = self.default_attribute_values.get(name.as_ref())
105                {
106                    InterpretedValue::Value(default.clone())
107                } else {
108                    av
109                }
110            })
111            .unwrap_or(InterpretedValue::Unset)
112    }
113
114    /// Returns `true` if the parser has a [document attribute] by this name.
115    ///
116    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
117    pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
118        self.attribute_values.contains_key(name.as_ref())
119    }
120
121    /// Returns `true` if the parser has a [document attribute] by this name
122    /// which has been set (i.e. is present and not [unset]).
123    ///
124    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
125    /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
126    pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
127        self.attribute_values
128            .get(name.as_ref())
129            .map(|a| a.value != InterpretedValue::Unset)
130            .unwrap_or(false)
131    }
132
133    /// Sets the value of an [intrinsic attribute].
134    ///
135    /// Intrinsic attributes are set automatically by the processor. These
136    /// attributes provide information about the document being processed (e.g.,
137    /// `docfile`), the security mode under which the processor is running
138    /// (e.g., `safe-mode-name`), and information about the user’s environment
139    /// (e.g., `user-home`).
140    ///
141    /// The [`modification_context`](ModificationContext) establishes whether
142    /// the value can be subsequently modified by the document header and/or in
143    /// the document body.
144    ///
145    /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
146    /// are always permitted. The last such call for any given attribute name
147    /// takes precendence.
148    ///
149    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
150    ///
151    /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
152    pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
153        mut self,
154        name: N,
155        value: V,
156        modification_context: ModificationContext,
157    ) -> Self {
158        let attribute_value = AttributeValue {
159            allowable_value: AllowableValue::Any,
160            modification_context,
161            value: InterpretedValue::Value(value.as_ref().to_string()),
162        };
163
164        self.attribute_values
165            .insert(name.as_ref().to_lowercase(), attribute_value);
166
167        self
168    }
169
170    /* Comment out until we're prepared to use and test this.
171        /// Sets the default value for an [intrinsic attribute].
172        ///
173        /// Default values for attributes are provided automatically by the
174        /// processor. These values provide a falllback textual value for an
175        /// attribute when it is merely "set" by the document via API, header, or
176        /// document body.
177        ///
178        /// Calling this does not imply that the value is set automatically by
179        /// default, nor does it establish any policy for where the value may be
180        /// modified. For that, please use [`with_intrinsic_attribute`].
181        ///
182        /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
183        /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
184        pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
185            mut self,
186            name: N,
187            value: V,
188        ) -> Self {
189            self.default_attribute_values
190                .insert(name.as_ref().to_string(), value.as_ref().to_string());
191
192            self
193        }
194    */
195
196    /// Sets the value of an [intrinsic attribute] from a boolean flag.
197    ///
198    /// A boolean `true` is interpreted as "set." A boolean `false` is
199    /// interpreted as "unset."
200    ///
201    /// Intrinsic attributes are set automatically by the processor. These
202    /// attributes provide information about the document being processed (e.g.,
203    /// `docfile`), the security mode under which the processor is running
204    /// (e.g., `safe-mode-name`), and information about the user’s environment
205    /// (e.g., `user-home`).
206    ///
207    /// The [`modification_context`](ModificationContext) establishes whether
208    /// the value can be subsequently modified by the document header and/or in
209    /// the document body.
210    ///
211    /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
212    /// always permitted. The last such call for any given attribute name takes
213    /// precendence.
214    ///
215    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
216    ///
217    /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
218    pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
219        mut self,
220        name: N,
221        value: bool,
222        modification_context: ModificationContext,
223    ) -> Self {
224        let attribute_value = AttributeValue {
225            allowable_value: AllowableValue::Any,
226            modification_context,
227            value: if value {
228                InterpretedValue::Set
229            } else {
230                InterpretedValue::Unset
231            },
232        };
233
234        self.attribute_values
235            .insert(name.as_ref().to_lowercase(), attribute_value);
236
237        self
238    }
239
240    /// Replace the default [`InlineSubstitutionRenderer`] for this parser.
241    ///
242    /// The default implementation of [`InlineSubstitutionRenderer`] that is
243    /// provided is suitable for HTML5 rendering. If you are targeting a
244    /// different back-end rendering, you will need to provide your own
245    /// implementation and set it using this call before parsing.
246    pub fn with_inline_substitution_renderer<ISR: InlineSubstitutionRenderer + 'static>(
247        mut self,
248        renderer: ISR,
249    ) -> Self {
250        self.renderer = Rc::new(renderer);
251
252        self
253    }
254
255    /// Called from [`Header::parse()`] to accept or reject an attribute value.
256    pub(crate) fn set_attribute_from_header<'src>(
257        &mut self,
258        attr: &Attribute<'src>,
259        warnings: &mut Vec<Warning<'src>>,
260    ) {
261        let attr_name = attr.name().data().to_lowercase();
262
263        let existing_attr = self.attribute_values.get(&attr_name);
264
265        // Verify that we have permission to overwrite any existing attribute value.
266        if let Some(existing_attr) = existing_attr
267            && existing_attr.modification_context == ModificationContext::ApiOnly
268        {
269            warnings.push(Warning {
270                source: attr.span(),
271                warning: WarningType::AttributeValueIsLocked(attr_name),
272            });
273            return;
274        }
275
276        let mut value = attr.value().clone();
277
278        if let InterpretedValue::Set = value
279            && let Some(default_value) = self.default_attribute_values.get(&attr_name)
280        {
281            value = InterpretedValue::Value(default_value.clone());
282        }
283
284        let attribute_value = AttributeValue {
285            allowable_value: AllowableValue::Any,
286            modification_context: ModificationContext::Anywhere,
287            value,
288        };
289
290        self.attribute_values.insert(attr_name, attribute_value);
291    }
292
293    /// Called from [`Header::parse()`] for a value that is derived from parsing
294    /// the header (except for attribute lines).
295    pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
296        &mut self,
297        name: N,
298        value: V,
299    ) {
300        let attr_name = name.as_ref().to_lowercase();
301
302        let attribute_value = AttributeValue {
303            allowable_value: AllowableValue::Any,
304            modification_context: ModificationContext::Anywhere,
305            value: InterpretedValue::Value(value.as_ref().to_owned()),
306        };
307
308        self.attribute_values.insert(attr_name, attribute_value);
309    }
310
311    /// Called from [`Block::parse()`] to accept or reject an attribute value
312    /// from a document (body) attribute.
313    pub(crate) fn set_attribute_from_body<'src>(
314        &mut self,
315        attr: &Attribute<'src>,
316        warnings: &mut Vec<Warning<'src>>,
317    ) {
318        let attr_name = attr.name().data().to_lowercase();
319
320        // Verify that we have permission to overwrite any existing attribute value.
321        if let Some(existing_attr) = self.attribute_values.get(&attr_name)
322            && existing_attr.modification_context != ModificationContext::Anywhere
323        {
324            warnings.push(Warning {
325                source: attr.span(),
326                warning: WarningType::AttributeValueIsLocked(attr_name),
327            });
328            return;
329        }
330
331        let attribute_value = AttributeValue {
332            allowable_value: AllowableValue::Any,
333            modification_context: ModificationContext::Anywhere,
334            value: attr.value().clone(),
335        };
336
337        self.attribute_values.insert(attr_name, attribute_value);
338    }
339}
340
341impl Default for Parser {
342    fn default() -> Self {
343        Self {
344            attribute_values: built_in_attrs(),
345            default_attribute_values: built_in_default_values(),
346            renderer: Rc::new(HtmlSubstitutionRenderer {}),
347            path_resolver: PathResolver::default(),
348        }
349    }
350}
351
352fn built_in_attrs() -> HashMap<String, AttributeValue> {
353    let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
354
355    attrs.insert(
356        "empty".to_owned(),
357        AttributeValue {
358            allowable_value: AllowableValue::Any,
359            modification_context: ModificationContext::ApiOnly,
360            value: InterpretedValue::Value("".into()),
361        },
362    );
363
364    attrs.insert(
365        "sp".to_owned(),
366        AttributeValue {
367            allowable_value: AllowableValue::Any,
368            modification_context: ModificationContext::ApiOnly,
369            value: InterpretedValue::Value(" ".into()),
370        },
371    );
372
373    attrs.insert(
374        "deg".to_owned(),
375        AttributeValue {
376            allowable_value: AllowableValue::Any,
377            modification_context: ModificationContext::ApiOnly,
378            value: InterpretedValue::Value("&#176;".into()),
379        },
380    );
381
382    attrs.insert(
383        "plus".to_owned(),
384        AttributeValue {
385            allowable_value: AllowableValue::Any,
386            modification_context: ModificationContext::ApiOnly,
387            value: InterpretedValue::Value("&#43;".into()),
388        },
389    );
390
391    attrs.insert(
392        "toc".to_owned(),
393        AttributeValue {
394            allowable_value: AllowableValue::Any,
395            modification_context: ModificationContext::ApiOrHeader,
396            value: InterpretedValue::Unset,
397        },
398    );
399
400    attrs.insert(
401        "sectids".to_owned(),
402        AttributeValue {
403            allowable_value: AllowableValue::Empty,
404            modification_context: ModificationContext::Anywhere,
405            value: InterpretedValue::Set,
406        },
407    );
408
409    attrs.insert(
410        "example-caption".to_owned(),
411        AttributeValue {
412            allowable_value: AllowableValue::Any,
413            modification_context: ModificationContext::Anywhere,
414            value: InterpretedValue::Set,
415        },
416    );
417
418    // TO DO: Replace ./images with value of imagesdir if that is non-default.
419    attrs.insert(
420        "iconsdir".to_owned(),
421        AttributeValue {
422            allowable_value: AllowableValue::Any,
423            modification_context: ModificationContext::Anywhere,
424            value: InterpretedValue::Set,
425        },
426    );
427
428    attrs
429}
430
431fn built_in_default_values() -> HashMap<String, String> {
432    let mut defaults: HashMap<String, String> = HashMap::new();
433
434    defaults.insert("example-caption".to_owned(), "Example".to_owned());
435    defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
436    defaults.insert("sectnums".to_owned(), "all".to_owned());
437    defaults.insert("toc".to_owned(), "auto".to_owned());
438
439    defaults
440}