asciidoc_parser/parser/parser.rs
1use std::{collections::HashMap, rc::Rc};
2
3use crate::{
4 Document, HasSpan,
5 document::{Attribute, InterpretedValue},
6 parser::{
7 AllowableValue, AttributeValue, HtmlSubstitutionRenderer, IncludeFileHandler,
8 InlineSubstitutionRenderer, ModificationContext, PathResolver, preprocessor::preprocess,
9 },
10 warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser {
17 /// Attribute values at current state of parsing.
18 pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20 /// Default values for attributes if "set."
21 default_attribute_values: HashMap<String, String>,
22
23 /// Specifies how the basic raw text of a simple block will be converted to
24 /// the format which will ultimately be presented in the final output.
25 ///
26 /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27 /// provide alternative implementations.
28 pub(crate) renderer: Rc<dyn InlineSubstitutionRenderer>,
29
30 /// Specifies the name of the primary file to be parsed.
31 pub(crate) primary_file_name: Option<String>,
32
33 /// Specifies how to generate clean and secure paths relative to the parsing
34 /// context.
35 pub path_resolver: PathResolver,
36
37 /// Handler for resolving include:: directives.
38 pub(crate) include_file_handler: Option<Rc<dyn IncludeFileHandler>>,
39}
40
41impl Default for Parser {
42 fn default() -> Self {
43 Self {
44 attribute_values: built_in_attrs(),
45 default_attribute_values: built_in_default_values(),
46 renderer: Rc::new(HtmlSubstitutionRenderer {}),
47 primary_file_name: None,
48 path_resolver: PathResolver::default(),
49 include_file_handler: None,
50 }
51 }
52}
53
54impl Parser {
55 /// Parse a UTF-8 string as an AsciiDoc document.
56 ///
57 /// The [`Document`] data structure returned by this call has a '`static`
58 /// lifetime; this is an implementation detail. It retains a copy of the
59 /// `source` string that was passed in, but it is not tied to the lifetime
60 /// of that string.
61 ///
62 /// Nearly all of the data structures contained within the [`Document`]
63 /// structure are tied to the lifetime of the document and have a `'src`
64 /// lifetime to signal their dependency on the source document.
65 ///
66 /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
67 /// encoding is allowed if a byte-order-mark (BOM) is present at the
68 /// start of a file. This format is not directly supported by the
69 /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
70 /// UTF-8 prior to parsing.
71 ///
72 /// The `Parser` struct will be updated with document attribute values
73 /// discovered during parsing. These values may be inspected using
74 /// [`attribute_value()`].
75 ///
76 /// # Warnings, not errors
77 ///
78 /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
79 /// return an [`Option`] or [`Result`] data type. There may be any number of
80 /// character sequences that have ambiguous or potentially unintended
81 /// meanings. For that reason, a caller is advised to review the warnings
82 /// provided via the [`warnings()`] iterator.
83 ///
84 /// [`warnings()`]: Document::warnings
85 /// [`attribute_value()`]: Self::attribute_value
86 pub fn parse(&mut self, source: &str) -> Document<'static> {
87 let (preprocessed_source, source_map) = preprocess(source, self);
88 Document::parse(&preprocessed_source, source_map, self)
89 }
90
91 /// Retrieves the current interpreted value of a [document attribute].
92 ///
93 /// Each document holds a set of name-value pairs called document
94 /// attributes. These attributes provide a means of configuring the AsciiDoc
95 /// processor, declaring document metadata, and defining reusable content.
96 /// This page introduces document attributes and answers some questions
97 /// about the terminology used when referring to them.
98 ///
99 /// ## What are document attributes?
100 ///
101 /// Document attributes are effectively document-scoped variables for the
102 /// AsciiDoc language. The AsciiDoc language defines a set of built-in
103 /// attributes, and also allows the author (or extensions) to define
104 /// additional document attributes, which may replace built-in attributes
105 /// when permitted.
106 ///
107 /// Built-in attributes either provide access to read-only information about
108 /// the document and its environment or allow the author to configure
109 /// behavior of the AsciiDoc processor for a whole document or select
110 /// regions. Built-in attributes are effectively unordered. User-defined
111 /// attribute serve as a powerful text replacement tool. User-defined
112 /// attributes are stored in the order in which they are defined.
113 ///
114 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
115 pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
116 self.attribute_values
117 .get(name.as_ref())
118 .map(|av| av.value.clone())
119 .map(|av| {
120 if let InterpretedValue::Set = av
121 && let Some(default) = self.default_attribute_values.get(name.as_ref())
122 {
123 InterpretedValue::Value(default.clone())
124 } else {
125 av
126 }
127 })
128 .unwrap_or(InterpretedValue::Unset)
129 }
130
131 /// Returns `true` if the parser has a [document attribute] by this name.
132 ///
133 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
134 pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
135 self.attribute_values.contains_key(name.as_ref())
136 }
137
138 /// Returns `true` if the parser has a [document attribute] by this name
139 /// which has been set (i.e. is present and not [unset]).
140 ///
141 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
142 /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
143 pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
144 self.attribute_values
145 .get(name.as_ref())
146 .map(|a| a.value != InterpretedValue::Unset)
147 .unwrap_or(false)
148 }
149
150 /// Sets the value of an [intrinsic attribute].
151 ///
152 /// Intrinsic attributes are set automatically by the processor. These
153 /// attributes provide information about the document being processed (e.g.,
154 /// `docfile`), the security mode under which the processor is running
155 /// (e.g., `safe-mode-name`), and information about the user’s environment
156 /// (e.g., `user-home`).
157 ///
158 /// The [`modification_context`](ModificationContext) establishes whether
159 /// the value can be subsequently modified by the document header and/or in
160 /// the document body.
161 ///
162 /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
163 /// are always permitted. The last such call for any given attribute name
164 /// takes precendence.
165 ///
166 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
167 ///
168 /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
169 pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
170 mut self,
171 name: N,
172 value: V,
173 modification_context: ModificationContext,
174 ) -> Self {
175 let attribute_value = AttributeValue {
176 allowable_value: AllowableValue::Any,
177 modification_context,
178 value: InterpretedValue::Value(value.as_ref().to_string()),
179 };
180
181 self.attribute_values
182 .insert(name.as_ref().to_lowercase(), attribute_value);
183
184 self
185 }
186
187 /* Comment out until we're prepared to use and test this.
188 /// Sets the default value for an [intrinsic attribute].
189 ///
190 /// Default values for attributes are provided automatically by the
191 /// processor. These values provide a falllback textual value for an
192 /// attribute when it is merely "set" by the document via API, header, or
193 /// document body.
194 ///
195 /// Calling this does not imply that the value is set automatically by
196 /// default, nor does it establish any policy for where the value may be
197 /// modified. For that, please use [`with_intrinsic_attribute`].
198 ///
199 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
200 /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
201 pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
202 mut self,
203 name: N,
204 value: V,
205 ) -> Self {
206 self.default_attribute_values
207 .insert(name.as_ref().to_string(), value.as_ref().to_string());
208
209 self
210 }
211 */
212
213 /// Sets the value of an [intrinsic attribute] from a boolean flag.
214 ///
215 /// A boolean `true` is interpreted as "set." A boolean `false` is
216 /// interpreted as "unset."
217 ///
218 /// Intrinsic attributes are set automatically by the processor. These
219 /// attributes provide information about the document being processed (e.g.,
220 /// `docfile`), the security mode under which the processor is running
221 /// (e.g., `safe-mode-name`), and information about the user’s environment
222 /// (e.g., `user-home`).
223 ///
224 /// The [`modification_context`](ModificationContext) establishes whether
225 /// the value can be subsequently modified by the document header and/or in
226 /// the document body.
227 ///
228 /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
229 /// always permitted. The last such call for any given attribute name takes
230 /// precendence.
231 ///
232 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
233 ///
234 /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
235 pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
236 mut self,
237 name: N,
238 value: bool,
239 modification_context: ModificationContext,
240 ) -> Self {
241 let attribute_value = AttributeValue {
242 allowable_value: AllowableValue::Any,
243 modification_context,
244 value: if value {
245 InterpretedValue::Set
246 } else {
247 InterpretedValue::Unset
248 },
249 };
250
251 self.attribute_values
252 .insert(name.as_ref().to_lowercase(), attribute_value);
253
254 self
255 }
256
257 /// Replace the default [`InlineSubstitutionRenderer`] for this parser.
258 ///
259 /// The default implementation of [`InlineSubstitutionRenderer`] that is
260 /// provided is suitable for HTML5 rendering. If you are targeting a
261 /// different back-end rendering, you will need to provide your own
262 /// implementation and set it using this call before parsing.
263 pub fn with_inline_substitution_renderer<ISR: InlineSubstitutionRenderer + 'static>(
264 mut self,
265 renderer: ISR,
266 ) -> Self {
267 self.renderer = Rc::new(renderer);
268 self
269 }
270
271 /// Sets the name of the primary file to be parsed when [`parse()`] is
272 /// called.
273 ///
274 /// This name will be used for any error messages detected in this file and
275 /// also will be passed to [`IncludeFileHandler::resolve_target()`] as the
276 /// `source` argument for any `include::` file resolution requests from this
277 /// file.
278 ///
279 /// [`parse()`]: Self::parse
280 /// [`IncludeFileHandler::resolve_target()`]: crate::parser::IncludeFileHandler::resolve_target
281 pub fn with_primary_file_name<S: AsRef<str>>(mut self, name: S) -> Self {
282 self.primary_file_name = Some(name.as_ref().to_owned());
283 self
284 }
285
286 /// Sets the [`IncludeFileHandler`] for this parser.
287 ///
288 /// The include file handler is responsible for resolving `include::`
289 /// directives encountered during preprocessing. If no handler is provided,
290 /// include directives will be ignored.
291 ///
292 /// [`IncludeFileHandler`]: crate::parser::IncludeFileHandler
293 pub fn with_include_file_handler<IFH: IncludeFileHandler + 'static>(
294 mut self,
295 handler: IFH,
296 ) -> Self {
297 self.include_file_handler = Some(Rc::new(handler));
298 self
299 }
300
301 /// Called from [`Header::parse()`] to accept or reject an attribute value.
302 pub(crate) fn set_attribute_from_header<'src>(
303 &mut self,
304 attr: &Attribute<'src>,
305 warnings: &mut Vec<Warning<'src>>,
306 ) {
307 let attr_name = attr.name().data().to_lowercase();
308
309 let existing_attr = self.attribute_values.get(&attr_name);
310
311 // Verify that we have permission to overwrite any existing attribute value.
312 if let Some(existing_attr) = existing_attr
313 && existing_attr.modification_context == ModificationContext::ApiOnly
314 {
315 warnings.push(Warning {
316 source: attr.span(),
317 warning: WarningType::AttributeValueIsLocked(attr_name),
318 });
319 return;
320 }
321
322 let mut value = attr.value().clone();
323
324 if let InterpretedValue::Set = value
325 && let Some(default_value) = self.default_attribute_values.get(&attr_name)
326 {
327 value = InterpretedValue::Value(default_value.clone());
328 }
329
330 let attribute_value = AttributeValue {
331 allowable_value: AllowableValue::Any,
332 modification_context: ModificationContext::Anywhere,
333 value,
334 };
335
336 self.attribute_values.insert(attr_name, attribute_value);
337 }
338
339 /// Called from [`Header::parse()`] for a value that is derived from parsing
340 /// the header (except for attribute lines).
341 pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
342 &mut self,
343 name: N,
344 value: V,
345 ) {
346 let attr_name = name.as_ref().to_lowercase();
347
348 let attribute_value = AttributeValue {
349 allowable_value: AllowableValue::Any,
350 modification_context: ModificationContext::Anywhere,
351 value: InterpretedValue::Value(value.as_ref().to_owned()),
352 };
353
354 self.attribute_values.insert(attr_name, attribute_value);
355 }
356
357 /// Called from [`Block::parse()`] to accept or reject an attribute value
358 /// from a document (body) attribute.
359 pub(crate) fn set_attribute_from_body<'src>(
360 &mut self,
361 attr: &Attribute<'src>,
362 warnings: &mut Vec<Warning<'src>>,
363 ) {
364 let attr_name = attr.name().data().to_lowercase();
365
366 // Verify that we have permission to overwrite any existing attribute value.
367 if let Some(existing_attr) = self.attribute_values.get(&attr_name)
368 && existing_attr.modification_context != ModificationContext::Anywhere
369 {
370 warnings.push(Warning {
371 source: attr.span(),
372 warning: WarningType::AttributeValueIsLocked(attr_name),
373 });
374 return;
375 }
376
377 let attribute_value = AttributeValue {
378 allowable_value: AllowableValue::Any,
379 modification_context: ModificationContext::Anywhere,
380 value: attr.value().clone(),
381 };
382
383 self.attribute_values.insert(attr_name, attribute_value);
384 }
385}
386
387fn built_in_attrs() -> HashMap<String, AttributeValue> {
388 let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
389
390 attrs.insert(
391 "empty".to_owned(),
392 AttributeValue {
393 allowable_value: AllowableValue::Any,
394 modification_context: ModificationContext::ApiOnly,
395 value: InterpretedValue::Value("".into()),
396 },
397 );
398
399 attrs.insert(
400 "sp".to_owned(),
401 AttributeValue {
402 allowable_value: AllowableValue::Any,
403 modification_context: ModificationContext::ApiOnly,
404 value: InterpretedValue::Value(" ".into()),
405 },
406 );
407
408 attrs.insert(
409 "deg".to_owned(),
410 AttributeValue {
411 allowable_value: AllowableValue::Any,
412 modification_context: ModificationContext::ApiOnly,
413 value: InterpretedValue::Value("°".into()),
414 },
415 );
416
417 attrs.insert(
418 "plus".to_owned(),
419 AttributeValue {
420 allowable_value: AllowableValue::Any,
421 modification_context: ModificationContext::ApiOnly,
422 value: InterpretedValue::Value("+".into()),
423 },
424 );
425
426 attrs.insert(
427 "toc".to_owned(),
428 AttributeValue {
429 allowable_value: AllowableValue::Any,
430 modification_context: ModificationContext::ApiOrHeader,
431 value: InterpretedValue::Unset,
432 },
433 );
434
435 attrs.insert(
436 "sectids".to_owned(),
437 AttributeValue {
438 allowable_value: AllowableValue::Empty,
439 modification_context: ModificationContext::Anywhere,
440 value: InterpretedValue::Set,
441 },
442 );
443
444 attrs.insert(
445 "example-caption".to_owned(),
446 AttributeValue {
447 allowable_value: AllowableValue::Any,
448 modification_context: ModificationContext::Anywhere,
449 value: InterpretedValue::Set,
450 },
451 );
452
453 // TO DO: Replace ./images with value of imagesdir if that is non-default.
454 attrs.insert(
455 "iconsdir".to_owned(),
456 AttributeValue {
457 allowable_value: AllowableValue::Any,
458 modification_context: ModificationContext::Anywhere,
459 value: InterpretedValue::Set,
460 },
461 );
462
463 attrs
464}
465
466fn built_in_default_values() -> HashMap<String, String> {
467 let mut defaults: HashMap<String, String> = HashMap::new();
468
469 defaults.insert("example-caption".to_owned(), "Example".to_owned());
470 defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
471 defaults.insert("sectnums".to_owned(), "all".to_owned());
472 defaults.insert("toc".to_owned(), "auto".to_owned());
473
474 defaults
475}