asciidoc_parser/parser/parser.rs
1use std::{collections::HashMap, rc::Rc};
2
3use crate::{
4 Document, HasSpan,
5 document::{Attribute, InterpretedValue},
6 parser::{
7 AllowableValue, AttributeValue, HtmlSubstitutionRenderer, InlineSubstitutionRenderer,
8 ModificationContext, PathResolver,
9 },
10 warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser {
17 /// Attribute values at current state of parsing.
18 pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20 /// Default values for attributes if "set."
21 default_attribute_values: HashMap<String, String>,
22
23 /// Specifies how the basic raw text of a simple block will be converted to
24 /// the format which will ultimately be presented in the final output.
25 ///
26 /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27 /// provide alternative implementations.
28 pub(crate) renderer: Rc<dyn InlineSubstitutionRenderer>,
29
30 /// Specifies how to generate clean and secure paths relative to the parsing
31 /// context.
32 pub path_resolver: PathResolver,
33}
34
35impl Parser {
36 /// Parse a UTF-8 string as an AsciiDoc document.
37 ///
38 /// The [`Document`] data structure returned by this call has a '`static`
39 /// lifetime; this is an implementation detail. It retains a copy of the
40 /// `source` string that was passed in, but it is not tied to the lifetime
41 /// of that string.
42 ///
43 /// Nearly all of the data structures contained within the [`Document`]
44 /// structure are tied to the lifetime of the document and have a `'src`
45 /// lifetime to signal their dependency on the source document.
46 ///
47 /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
48 /// encoding is allowed if a byte-order-mark (BOM) is present at the
49 /// start of a file. This format is not directly supported by the
50 /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
51 /// UTF-8 prior to parsing.
52 ///
53 /// The `Parser` struct will be updated with document attribute values
54 /// discovered during parsing. These values may be inspected using
55 /// [`attribute_value()`].
56 ///
57 /// # Warnings, not errors
58 ///
59 /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
60 /// return an [`Option`] or [`Result`] data type. There may be any number of
61 /// character sequences that have ambiguous or potentially unintended
62 /// meanings. For that reason, a caller is advised to review the warnings
63 /// provided via the [`warnings()`] iterator.
64 ///
65 /// [`warnings()`]: Document::warnings
66 /// [`attribute_value()`]: Self::attribute_value
67 pub fn parse(&mut self, source: &str) -> Document<'static> {
68 // The mutable borrow of self ends when Document::parse returns,
69 // ensuring no mutable reference to Parser escapes with the Document.
70 // The Document is self-contained and owns its source string internally.
71 Document::parse(source, self)
72 }
73
74 /// Retrieves the current interpreted value of a [document attribute].
75 ///
76 /// Each document holds a set of name-value pairs called document
77 /// attributes. These attributes provide a means of configuring the AsciiDoc
78 /// processor, declaring document metadata, and defining reusable content.
79 /// This page introduces document attributes and answers some questions
80 /// about the terminology used when referring to them.
81 ///
82 /// ## What are document attributes?
83 ///
84 /// Document attributes are effectively document-scoped variables for the
85 /// AsciiDoc language. The AsciiDoc language defines a set of built-in
86 /// attributes, and also allows the author (or extensions) to define
87 /// additional document attributes, which may replace built-in attributes
88 /// when permitted.
89 ///
90 /// Built-in attributes either provide access to read-only information about
91 /// the document and its environment or allow the author to configure
92 /// behavior of the AsciiDoc processor for a whole document or select
93 /// regions. Built-in attributes are effectively unordered. User-defined
94 /// attribute serve as a powerful text replacement tool. User-defined
95 /// attributes are stored in the order in which they are defined.
96 ///
97 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
98 pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
99 self.attribute_values
100 .get(name.as_ref())
101 .map(|av| av.value.clone())
102 .map(|av| {
103 if let InterpretedValue::Set = av
104 && let Some(default) = self.default_attribute_values.get(name.as_ref())
105 {
106 InterpretedValue::Value(default.clone())
107 } else {
108 av
109 }
110 })
111 .unwrap_or(InterpretedValue::Unset)
112 }
113
114 /// Returns `true` if the parser has a [document attribute] by this name.
115 ///
116 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
117 pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
118 self.attribute_values.contains_key(name.as_ref())
119 }
120
121 /// Returns `true` if the parser has a [document attribute] by this name
122 /// which has been set (i.e. is present and not [unset]).
123 ///
124 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
125 /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
126 pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
127 self.attribute_values
128 .get(name.as_ref())
129 .map(|a| a.value != InterpretedValue::Unset)
130 .unwrap_or(false)
131 }
132
133 /// Sets the value of an [intrinsic attribute].
134 ///
135 /// Intrinsic attributes are set automatically by the processor. These
136 /// attributes provide information about the document being processed (e.g.,
137 /// `docfile`), the security mode under which the processor is running
138 /// (e.g., `safe-mode-name`), and information about the user’s environment
139 /// (e.g., `user-home`).
140 ///
141 /// The [`modification_context`](ModificationContext) establishes whether
142 /// the value can be subsequently modified by the document header and/or in
143 /// the document body.
144 ///
145 /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
146 /// are always permitted. The last such call for any given attribute name
147 /// takes precendence.
148 ///
149 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
150 ///
151 /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
152 pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
153 mut self,
154 name: N,
155 value: V,
156 modification_context: ModificationContext,
157 ) -> Self {
158 let attribute_value = AttributeValue {
159 allowable_value: AllowableValue::Any,
160 modification_context,
161 value: InterpretedValue::Value(value.as_ref().to_string()),
162 };
163
164 self.attribute_values
165 .insert(name.as_ref().to_lowercase(), attribute_value);
166
167 self
168 }
169
170 /* Comment out until we're prepared to use and test this.
171 /// Sets the default value for an [intrinsic attribute].
172 ///
173 /// Default values for attributes are provided automatically by the
174 /// processor. These values provide a falllback textual value for an
175 /// attribute when it is merely "set" by the document via API, header, or
176 /// document body.
177 ///
178 /// Calling this does not imply that the value is set automatically by
179 /// default, nor does it establish any policy for where the value may be
180 /// modified. For that, please use [`with_intrinsic_attribute`].
181 ///
182 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
183 /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
184 pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
185 mut self,
186 name: N,
187 value: V,
188 ) -> Self {
189 self.default_attribute_values
190 .insert(name.as_ref().to_string(), value.as_ref().to_string());
191
192 self
193 }
194 */
195
196 /// Sets the value of an [intrinsic attribute] from a boolean flag.
197 ///
198 /// A boolean `true` is interpreted as "set." A boolean `false` is
199 /// interpreted as "unset."
200 ///
201 /// Intrinsic attributes are set automatically by the processor. These
202 /// attributes provide information about the document being processed (e.g.,
203 /// `docfile`), the security mode under which the processor is running
204 /// (e.g., `safe-mode-name`), and information about the user’s environment
205 /// (e.g., `user-home`).
206 ///
207 /// The [`modification_context`](ModificationContext) establishes whether
208 /// the value can be subsequently modified by the document header and/or in
209 /// the document body.
210 ///
211 /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
212 /// always permitted. The last such call for any given attribute name takes
213 /// precendence.
214 ///
215 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
216 ///
217 /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
218 pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
219 mut self,
220 name: N,
221 value: bool,
222 modification_context: ModificationContext,
223 ) -> Self {
224 let attribute_value = AttributeValue {
225 allowable_value: AllowableValue::Any,
226 modification_context,
227 value: if value {
228 InterpretedValue::Set
229 } else {
230 InterpretedValue::Unset
231 },
232 };
233
234 self.attribute_values
235 .insert(name.as_ref().to_lowercase(), attribute_value);
236
237 self
238 }
239
240 /// Replace the default [`InlineSubstitutionRenderer`] for this parser.
241 ///
242 /// The default implementation of [`InlineSubstitutionRenderer`] that is
243 /// provided is suitable for HTML5 rendering. If you are targeting a
244 /// different back-end rendering, you will need to provide your own
245 /// implementation and set it using this call before parsing.
246 pub fn with_inline_substitution_renderer<ISR: InlineSubstitutionRenderer + 'static>(
247 mut self,
248 renderer: ISR,
249 ) -> Self {
250 self.renderer = Rc::new(renderer);
251
252 self
253 }
254
255 /// Called from [`Header::parse()`] to accept or reject an attribute value.
256 pub(crate) fn set_attribute_from_header<'src>(
257 &mut self,
258 attr: &Attribute<'src>,
259 warnings: &mut Vec<Warning<'src>>,
260 ) {
261 let attr_name = attr.name().data().to_lowercase();
262
263 let existing_attr = self.attribute_values.get(&attr_name);
264
265 // Verify that we have permission to overwrite any existing attribute value.
266 if let Some(existing_attr) = existing_attr
267 && existing_attr.modification_context == ModificationContext::ApiOnly
268 {
269 warnings.push(Warning {
270 source: attr.span(),
271 warning: WarningType::AttributeValueIsLocked(attr_name),
272 });
273 return;
274 }
275
276 let mut value = attr.value().clone();
277
278 if let InterpretedValue::Set = value
279 && let Some(default_value) = self.default_attribute_values.get(&attr_name)
280 {
281 value = InterpretedValue::Value(default_value.clone());
282 }
283
284 let attribute_value = AttributeValue {
285 allowable_value: AllowableValue::Any,
286 modification_context: ModificationContext::Anywhere,
287 value,
288 };
289
290 self.attribute_values.insert(attr_name, attribute_value);
291 }
292
293 /// Called from [`Header::parse()`] for a value that is derived from parsing
294 /// the header (except for attribute lines).
295 pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
296 &mut self,
297 name: N,
298 value: V,
299 ) {
300 let attr_name = name.as_ref().to_lowercase();
301
302 let attribute_value = AttributeValue {
303 allowable_value: AllowableValue::Any,
304 modification_context: ModificationContext::Anywhere,
305 value: InterpretedValue::Value(value.as_ref().to_owned()),
306 };
307
308 self.attribute_values.insert(attr_name, attribute_value);
309 }
310
311 /// Called from [`Block::parse()`] to accept or reject an attribute value
312 /// from a document (body) attribute.
313 pub(crate) fn set_attribute_from_body<'src>(
314 &mut self,
315 attr: &Attribute<'src>,
316 warnings: &mut Vec<Warning<'src>>,
317 ) {
318 let attr_name = attr.name().data().to_lowercase();
319
320 // Verify that we have permission to overwrite any existing attribute value.
321 if let Some(existing_attr) = self.attribute_values.get(&attr_name)
322 && existing_attr.modification_context != ModificationContext::Anywhere
323 {
324 warnings.push(Warning {
325 source: attr.span(),
326 warning: WarningType::AttributeValueIsLocked(attr_name),
327 });
328 return;
329 }
330
331 let attribute_value = AttributeValue {
332 allowable_value: AllowableValue::Any,
333 modification_context: ModificationContext::Anywhere,
334 value: attr.value().clone(),
335 };
336
337 self.attribute_values.insert(attr_name, attribute_value);
338 }
339}
340
341impl Default for Parser {
342 fn default() -> Self {
343 Self {
344 attribute_values: built_in_attrs(),
345 default_attribute_values: built_in_default_values(),
346 renderer: Rc::new(HtmlSubstitutionRenderer {}),
347 path_resolver: PathResolver::default(),
348 }
349 }
350}
351
352fn built_in_attrs() -> HashMap<String, AttributeValue> {
353 let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
354
355 attrs.insert(
356 "empty".to_owned(),
357 AttributeValue {
358 allowable_value: AllowableValue::Any,
359 modification_context: ModificationContext::ApiOnly,
360 value: InterpretedValue::Value("".into()),
361 },
362 );
363
364 attrs.insert(
365 "sp".to_owned(),
366 AttributeValue {
367 allowable_value: AllowableValue::Any,
368 modification_context: ModificationContext::ApiOnly,
369 value: InterpretedValue::Value(" ".into()),
370 },
371 );
372
373 attrs.insert(
374 "deg".to_owned(),
375 AttributeValue {
376 allowable_value: AllowableValue::Any,
377 modification_context: ModificationContext::ApiOnly,
378 value: InterpretedValue::Value("°".into()),
379 },
380 );
381
382 attrs.insert(
383 "plus".to_owned(),
384 AttributeValue {
385 allowable_value: AllowableValue::Any,
386 modification_context: ModificationContext::ApiOnly,
387 value: InterpretedValue::Value("+".into()),
388 },
389 );
390
391 attrs.insert(
392 "toc".to_owned(),
393 AttributeValue {
394 allowable_value: AllowableValue::Any,
395 modification_context: ModificationContext::ApiOrHeader,
396 value: InterpretedValue::Unset,
397 },
398 );
399
400 attrs.insert(
401 "sectids".to_owned(),
402 AttributeValue {
403 allowable_value: AllowableValue::Empty,
404 modification_context: ModificationContext::Anywhere,
405 value: InterpretedValue::Set,
406 },
407 );
408
409 attrs.insert(
410 "example-caption".to_owned(),
411 AttributeValue {
412 allowable_value: AllowableValue::Any,
413 modification_context: ModificationContext::Anywhere,
414 value: InterpretedValue::Set,
415 },
416 );
417
418 // TO DO: Replace ./images with value of imagesdir if that is non-default.
419 attrs.insert(
420 "iconsdir".to_owned(),
421 AttributeValue {
422 allowable_value: AllowableValue::Any,
423 modification_context: ModificationContext::Anywhere,
424 value: InterpretedValue::Set,
425 },
426 );
427
428 attrs
429}
430
431fn built_in_default_values() -> HashMap<String, String> {
432 let mut defaults: HashMap<String, String> = HashMap::new();
433
434 defaults.insert("example-caption".to_owned(), "Example".to_owned());
435 defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
436 defaults.insert("sectnums".to_owned(), "all".to_owned());
437 defaults.insert("toc".to_owned(), "auto".to_owned());
438
439 defaults
440}