asciidoc_parser/parser/
parser.rs

1use std::{collections::HashMap, rc::Rc};
2
3use crate::{
4    Document, HasSpan,
5    document::{Attribute, InterpretedValue},
6    parser::{
7        AllowableValue, AttributeValue, HtmlSubstitutionRenderer, IncludeFileHandler,
8        InlineSubstitutionRenderer, ModificationContext, PathResolver, preprocessor::preprocess,
9    },
10    warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser {
17    /// Attribute values at current state of parsing.
18    pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20    /// Default values for attributes if "set."
21    default_attribute_values: HashMap<String, String>,
22
23    /// Specifies how the basic raw text of a simple block will be converted to
24    /// the format which will ultimately be presented in the final output.
25    ///
26    /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27    /// provide alternative implementations.
28    pub(crate) renderer: Rc<dyn InlineSubstitutionRenderer>,
29
30    /// Specifies the name of the primary file to be parsed.
31    pub(crate) primary_file_name: Option<String>,
32
33    /// Specifies how to generate clean and secure paths relative to the parsing
34    /// context.
35    pub path_resolver: PathResolver,
36
37    /// Handler for resolving include:: directives.
38    pub(crate) include_file_handler: Option<Rc<dyn IncludeFileHandler>>,
39}
40
41impl Default for Parser {
42    fn default() -> Self {
43        Self {
44            attribute_values: built_in_attrs(),
45            default_attribute_values: built_in_default_values(),
46            renderer: Rc::new(HtmlSubstitutionRenderer {}),
47            primary_file_name: None,
48            path_resolver: PathResolver::default(),
49            include_file_handler: None,
50        }
51    }
52}
53
54impl Parser {
55    /// Parse a UTF-8 string as an AsciiDoc document.
56    ///
57    /// The [`Document`] data structure returned by this call has a '`static`
58    /// lifetime; this is an implementation detail. It retains a copy of the
59    /// `source` string that was passed in, but it is not tied to the lifetime
60    /// of that string.
61    ///
62    /// Nearly all of the data structures contained within the [`Document`]
63    /// structure are tied to the lifetime of the document and have a `'src`
64    /// lifetime to signal their dependency on the source document.
65    ///
66    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
67    /// encoding is allowed if a byte-order-mark (BOM) is present at the
68    /// start of a file. This format is not directly supported by the
69    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
70    /// UTF-8 prior to parsing.
71    ///
72    /// The `Parser` struct will be updated with document attribute values
73    /// discovered during parsing. These values may be inspected using
74    /// [`attribute_value()`].
75    ///
76    /// # Warnings, not errors
77    ///
78    /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
79    /// return an [`Option`] or [`Result`] data type. There may be any number of
80    /// character sequences that have ambiguous or potentially unintended
81    /// meanings. For that reason, a caller is advised to review the warnings
82    /// provided via the [`warnings()`] iterator.
83    ///
84    /// [`warnings()`]: Document::warnings
85    /// [`attribute_value()`]: Self::attribute_value
86    pub fn parse(&mut self, source: &str) -> Document<'static> {
87        let (preprocessed_source, source_map) = preprocess(source, self);
88        Document::parse(&preprocessed_source, source_map, self)
89    }
90
91    /// Retrieves the current interpreted value of a [document attribute].
92    ///
93    /// Each document holds a set of name-value pairs called document
94    /// attributes. These attributes provide a means of configuring the AsciiDoc
95    /// processor, declaring document metadata, and defining reusable content.
96    /// This page introduces document attributes and answers some questions
97    /// about the terminology used when referring to them.
98    ///
99    /// ## What are document attributes?
100    ///
101    /// Document attributes are effectively document-scoped variables for the
102    /// AsciiDoc language. The AsciiDoc language defines a set of built-in
103    /// attributes, and also allows the author (or extensions) to define
104    /// additional document attributes, which may replace built-in attributes
105    /// when permitted.
106    ///
107    /// Built-in attributes either provide access to read-only information about
108    /// the document and its environment or allow the author to configure
109    /// behavior of the AsciiDoc processor for a whole document or select
110    /// regions. Built-in attributes are effectively unordered. User-defined
111    /// attribute serve as a powerful text replacement tool. User-defined
112    /// attributes are stored in the order in which they are defined.
113    ///
114    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
115    pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
116        self.attribute_values
117            .get(name.as_ref())
118            .map(|av| av.value.clone())
119            .map(|av| {
120                if let InterpretedValue::Set = av
121                    && let Some(default) = self.default_attribute_values.get(name.as_ref())
122                {
123                    InterpretedValue::Value(default.clone())
124                } else {
125                    av
126                }
127            })
128            .unwrap_or(InterpretedValue::Unset)
129    }
130
131    /// Returns `true` if the parser has a [document attribute] by this name.
132    ///
133    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
134    pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
135        self.attribute_values.contains_key(name.as_ref())
136    }
137
138    /// Returns `true` if the parser has a [document attribute] by this name
139    /// which has been set (i.e. is present and not [unset]).
140    ///
141    /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
142    /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
143    pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
144        self.attribute_values
145            .get(name.as_ref())
146            .map(|a| a.value != InterpretedValue::Unset)
147            .unwrap_or(false)
148    }
149
150    /// Sets the value of an [intrinsic attribute].
151    ///
152    /// Intrinsic attributes are set automatically by the processor. These
153    /// attributes provide information about the document being processed (e.g.,
154    /// `docfile`), the security mode under which the processor is running
155    /// (e.g., `safe-mode-name`), and information about the user’s environment
156    /// (e.g., `user-home`).
157    ///
158    /// The [`modification_context`](ModificationContext) establishes whether
159    /// the value can be subsequently modified by the document header and/or in
160    /// the document body.
161    ///
162    /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
163    /// are always permitted. The last such call for any given attribute name
164    /// takes precendence.
165    ///
166    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
167    ///
168    /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
169    pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
170        mut self,
171        name: N,
172        value: V,
173        modification_context: ModificationContext,
174    ) -> Self {
175        let attribute_value = AttributeValue {
176            allowable_value: AllowableValue::Any,
177            modification_context,
178            value: InterpretedValue::Value(value.as_ref().to_string()),
179        };
180
181        self.attribute_values
182            .insert(name.as_ref().to_lowercase(), attribute_value);
183
184        self
185    }
186
187    /* Comment out until we're prepared to use and test this.
188        /// Sets the default value for an [intrinsic attribute].
189        ///
190        /// Default values for attributes are provided automatically by the
191        /// processor. These values provide a falllback textual value for an
192        /// attribute when it is merely "set" by the document via API, header, or
193        /// document body.
194        ///
195        /// Calling this does not imply that the value is set automatically by
196        /// default, nor does it establish any policy for where the value may be
197        /// modified. For that, please use [`with_intrinsic_attribute`].
198        ///
199        /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
200        /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
201        pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
202            mut self,
203            name: N,
204            value: V,
205        ) -> Self {
206            self.default_attribute_values
207                .insert(name.as_ref().to_string(), value.as_ref().to_string());
208
209            self
210        }
211    */
212
213    /// Sets the value of an [intrinsic attribute] from a boolean flag.
214    ///
215    /// A boolean `true` is interpreted as "set." A boolean `false` is
216    /// interpreted as "unset."
217    ///
218    /// Intrinsic attributes are set automatically by the processor. These
219    /// attributes provide information about the document being processed (e.g.,
220    /// `docfile`), the security mode under which the processor is running
221    /// (e.g., `safe-mode-name`), and information about the user’s environment
222    /// (e.g., `user-home`).
223    ///
224    /// The [`modification_context`](ModificationContext) establishes whether
225    /// the value can be subsequently modified by the document header and/or in
226    /// the document body.
227    ///
228    /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
229    /// always permitted. The last such call for any given attribute name takes
230    /// precendence.
231    ///
232    /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
233    ///
234    /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
235    pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
236        mut self,
237        name: N,
238        value: bool,
239        modification_context: ModificationContext,
240    ) -> Self {
241        let attribute_value = AttributeValue {
242            allowable_value: AllowableValue::Any,
243            modification_context,
244            value: if value {
245                InterpretedValue::Set
246            } else {
247                InterpretedValue::Unset
248            },
249        };
250
251        self.attribute_values
252            .insert(name.as_ref().to_lowercase(), attribute_value);
253
254        self
255    }
256
257    /// Replace the default [`InlineSubstitutionRenderer`] for this parser.
258    ///
259    /// The default implementation of [`InlineSubstitutionRenderer`] that is
260    /// provided is suitable for HTML5 rendering. If you are targeting a
261    /// different back-end rendering, you will need to provide your own
262    /// implementation and set it using this call before parsing.
263    pub fn with_inline_substitution_renderer<ISR: InlineSubstitutionRenderer + 'static>(
264        mut self,
265        renderer: ISR,
266    ) -> Self {
267        self.renderer = Rc::new(renderer);
268        self
269    }
270
271    /// Sets the name of the primary file to be parsed when [`parse()`] is
272    /// called.
273    ///
274    /// This name will be used for any error messages detected in this file and
275    /// also will be passed to [`IncludeFileHandler::resolve_target()`] as the
276    /// `source` argument for any `include::` file resolution requests from this
277    /// file.
278    ///
279    /// [`parse()`]: Self::parse
280    /// [`IncludeFileHandler::resolve_target()`]: crate::parser::IncludeFileHandler::resolve_target
281    pub fn with_primary_file_name<S: AsRef<str>>(mut self, name: S) -> Self {
282        self.primary_file_name = Some(name.as_ref().to_owned());
283        self
284    }
285
286    /// Sets the [`IncludeFileHandler`] for this parser.
287    ///
288    /// The include file handler is responsible for resolving `include::`
289    /// directives encountered during preprocessing. If no handler is provided,
290    /// include directives will be ignored.
291    ///
292    /// [`IncludeFileHandler`]: crate::parser::IncludeFileHandler
293    pub fn with_include_file_handler<IFH: IncludeFileHandler + 'static>(
294        mut self,
295        handler: IFH,
296    ) -> Self {
297        self.include_file_handler = Some(Rc::new(handler));
298        self
299    }
300
301    /// Called from [`Header::parse()`] to accept or reject an attribute value.
302    pub(crate) fn set_attribute_from_header<'src>(
303        &mut self,
304        attr: &Attribute<'src>,
305        warnings: &mut Vec<Warning<'src>>,
306    ) {
307        let attr_name = attr.name().data().to_lowercase();
308
309        let existing_attr = self.attribute_values.get(&attr_name);
310
311        // Verify that we have permission to overwrite any existing attribute value.
312        if let Some(existing_attr) = existing_attr
313            && existing_attr.modification_context == ModificationContext::ApiOnly
314        {
315            warnings.push(Warning {
316                source: attr.span(),
317                warning: WarningType::AttributeValueIsLocked(attr_name),
318            });
319            return;
320        }
321
322        let mut value = attr.value().clone();
323
324        if let InterpretedValue::Set = value
325            && let Some(default_value) = self.default_attribute_values.get(&attr_name)
326        {
327            value = InterpretedValue::Value(default_value.clone());
328        }
329
330        let attribute_value = AttributeValue {
331            allowable_value: AllowableValue::Any,
332            modification_context: ModificationContext::Anywhere,
333            value,
334        };
335
336        self.attribute_values.insert(attr_name, attribute_value);
337    }
338
339    /// Called from [`Header::parse()`] for a value that is derived from parsing
340    /// the header (except for attribute lines).
341    pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
342        &mut self,
343        name: N,
344        value: V,
345    ) {
346        let attr_name = name.as_ref().to_lowercase();
347
348        let attribute_value = AttributeValue {
349            allowable_value: AllowableValue::Any,
350            modification_context: ModificationContext::Anywhere,
351            value: InterpretedValue::Value(value.as_ref().to_owned()),
352        };
353
354        self.attribute_values.insert(attr_name, attribute_value);
355    }
356
357    /// Called from [`Block::parse()`] to accept or reject an attribute value
358    /// from a document (body) attribute.
359    pub(crate) fn set_attribute_from_body<'src>(
360        &mut self,
361        attr: &Attribute<'src>,
362        warnings: &mut Vec<Warning<'src>>,
363    ) {
364        let attr_name = attr.name().data().to_lowercase();
365
366        // Verify that we have permission to overwrite any existing attribute value.
367        if let Some(existing_attr) = self.attribute_values.get(&attr_name)
368            && existing_attr.modification_context != ModificationContext::Anywhere
369        {
370            warnings.push(Warning {
371                source: attr.span(),
372                warning: WarningType::AttributeValueIsLocked(attr_name),
373            });
374            return;
375        }
376
377        let attribute_value = AttributeValue {
378            allowable_value: AllowableValue::Any,
379            modification_context: ModificationContext::Anywhere,
380            value: attr.value().clone(),
381        };
382
383        self.attribute_values.insert(attr_name, attribute_value);
384    }
385}
386
387fn built_in_attrs() -> HashMap<String, AttributeValue> {
388    let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
389
390    attrs.insert(
391        "empty".to_owned(),
392        AttributeValue {
393            allowable_value: AllowableValue::Any,
394            modification_context: ModificationContext::ApiOnly,
395            value: InterpretedValue::Value("".into()),
396        },
397    );
398
399    attrs.insert(
400        "sp".to_owned(),
401        AttributeValue {
402            allowable_value: AllowableValue::Any,
403            modification_context: ModificationContext::ApiOnly,
404            value: InterpretedValue::Value(" ".into()),
405        },
406    );
407
408    attrs.insert(
409        "deg".to_owned(),
410        AttributeValue {
411            allowable_value: AllowableValue::Any,
412            modification_context: ModificationContext::ApiOnly,
413            value: InterpretedValue::Value("&#176;".into()),
414        },
415    );
416
417    attrs.insert(
418        "plus".to_owned(),
419        AttributeValue {
420            allowable_value: AllowableValue::Any,
421            modification_context: ModificationContext::ApiOnly,
422            value: InterpretedValue::Value("&#43;".into()),
423        },
424    );
425
426    attrs.insert(
427        "toc".to_owned(),
428        AttributeValue {
429            allowable_value: AllowableValue::Any,
430            modification_context: ModificationContext::ApiOrHeader,
431            value: InterpretedValue::Unset,
432        },
433    );
434
435    attrs.insert(
436        "sectids".to_owned(),
437        AttributeValue {
438            allowable_value: AllowableValue::Empty,
439            modification_context: ModificationContext::Anywhere,
440            value: InterpretedValue::Set,
441        },
442    );
443
444    attrs.insert(
445        "example-caption".to_owned(),
446        AttributeValue {
447            allowable_value: AllowableValue::Any,
448            modification_context: ModificationContext::Anywhere,
449            value: InterpretedValue::Set,
450        },
451    );
452
453    // TO DO: Replace ./images with value of imagesdir if that is non-default.
454    attrs.insert(
455        "iconsdir".to_owned(),
456        AttributeValue {
457            allowable_value: AllowableValue::Any,
458            modification_context: ModificationContext::Anywhere,
459            value: InterpretedValue::Set,
460        },
461    );
462
463    attrs
464}
465
466fn built_in_default_values() -> HashMap<String, String> {
467    let mut defaults: HashMap<String, String> = HashMap::new();
468
469    defaults.insert("example-caption".to_owned(), "Example".to_owned());
470    defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
471    defaults.insert("sectnums".to_owned(), "all".to_owned());
472    defaults.insert("toc".to_owned(), "auto".to_owned());
473
474    defaults
475}